From 77a5770a897c5d647308c2449bb46c64bbd5e855 Mon Sep 17 00:00:00 2001
From: Adrian Galvan <adrian@ethyca.com>
Date: Tue, 10 Dec 2024 10:52:32 -0800
Subject: [PATCH] Splitting out query configs and tests

---
 .../query_configs/mongodb_query_config.py     |   2 +-
 .../connectors/query_configs/query_config.py  |  17 +-
 .../connectors/test_dynamodb_query_config.py  | 129 ++++++
 .../connectors/test_mongo_query_config.py     | 283 ++++++++++++
 .../service/connectors/test_query_config.py   | 431 +-----------------
 .../connectors/test_scylladb_query_config.py  |  47 ++
 tests/ops/task/traversal_data.py              |  79 +++-
 7 files changed, 537 insertions(+), 451 deletions(-)
 create mode 100644 tests/ops/service/connectors/test_dynamodb_query_config.py
 create mode 100644 tests/ops/service/connectors/test_mongo_query_config.py
 create mode 100644 tests/ops/service/connectors/test_scylladb_query_config.py

diff --git a/src/fides/api/service/connectors/query_configs/mongodb_query_config.py b/src/fides/api/service/connectors/query_configs/mongodb_query_config.py
index edb57599db..1a6aa303f0 100644
--- a/src/fides/api/service/connectors/query_configs/mongodb_query_config.py
+++ b/src/fides/api/service/connectors/query_configs/mongodb_query_config.py
@@ -72,7 +72,7 @@ def generate_update_stmt(
         where_clauses: Dict[str, Any] = filter_nonempty_values(
             {
                 field_path.string_path: field.cast(row[field_path.string_path])
-                for field_path, field in self.incoming_field_paths.items()
+                for field_path, field in self.primary_key_field_paths.items()
             }
         )
 
diff --git a/src/fides/api/service/connectors/query_configs/query_config.py b/src/fides/api/service/connectors/query_configs/query_config.py
index 2026fe0b0f..4ef115d910 100644
--- a/src/fides/api/service/connectors/query_configs/query_config.py
+++ b/src/fides/api/service/connectors/query_configs/query_config.py
@@ -101,7 +101,7 @@ def primary_key_field_paths(self) -> Dict[FieldPath, Field]:
         }
 
     @property
-    def incoming_field_paths(self) -> Dict[FieldPath, Field]:
+    def reference_field_paths(self) -> Dict[FieldPath, Field]:
         """Mapping of FieldPaths to Fields that have incoming identity or dataset references"""
         return {
             field_path: field
@@ -447,10 +447,19 @@ def generate_update_stmt(
     ) -> Optional[T]:
         """Returns an update statement in generic SQL-ish dialect."""
         update_value_map: Dict[str, Any] = self.update_value_map(row, policy, request)
+
+        non_empty_primary_key_fields: Dict[str, Field] = filter_nonempty_values(
+            {
+                fpath.string_path: fld.cast(row[fpath.string_path])
+                for fpath, fld in self.primary_key_field_paths.items()
+                if fpath.string_path in row
+            }
+        )
+
         non_empty_reference_fields: Dict[str, Field] = filter_nonempty_values(
             {
                 fpath.string_path: fld.cast(row[fpath.string_path])
-                for fpath, fld in self.incoming_field_paths.items()
+                for fpath, fld in self.reference_field_paths.items()
                 if fpath.string_path in row
             }
         )
@@ -463,10 +472,10 @@ def generate_update_stmt(
 
         update_clauses = self.get_update_clauses(
             {k: f"masked_{k}" for k in update_value_map},
-            non_empty_reference_fields,
+            non_empty_primary_key_fields or non_empty_reference_fields,
         )
         where_clauses = self.format_key_map_for_update_stmt(
-            {k: k for k in non_empty_reference_fields}
+            {k: k for k in non_empty_primary_key_fields or non_empty_reference_fields}
         )
 
         valid = len(where_clauses) > 0 and len(update_clauses) > 0
diff --git a/tests/ops/service/connectors/test_dynamodb_query_config.py b/tests/ops/service/connectors/test_dynamodb_query_config.py
new file mode 100644
index 0000000000..4591ae9385
--- /dev/null
+++ b/tests/ops/service/connectors/test_dynamodb_query_config.py
@@ -0,0 +1,129 @@
+from datetime import datetime, timezone
+
+import pytest
+from boto3.dynamodb.types import TypeDeserializer
+from fideslang.models import Dataset
+
+from fides.api.graph.config import CollectionAddress
+from fides.api.graph.graph import DatasetGraph
+from fides.api.graph.traversal import Traversal
+from fides.api.models.datasetconfig import convert_dataset_to_graph
+from fides.api.models.privacy_request import PrivacyRequest
+from fides.api.service.connectors.query_configs.dynamodb_query_config import (
+    DynamoDBQueryConfig,
+)
+
+privacy_request = PrivacyRequest(id="234544")
+
+
+class TestDynamoDBQueryConfig:
+    @pytest.fixture(scope="function")
+    def identity(self):
+        identity = {"email": "customer-test_uuid@example.com"}
+        return identity
+
+    @pytest.fixture(scope="function")
+    def dataset_graph(self, integration_dynamodb_config, example_datasets):
+        dataset = Dataset(**example_datasets[11])
+        dataset_graph = convert_dataset_to_graph(
+            dataset, integration_dynamodb_config.key
+        )
+
+        return DatasetGraph(*[dataset_graph])
+
+    @pytest.fixture(scope="function")
+    def traversal(self, identity, dataset_graph):
+        dynamo_traversal = Traversal(dataset_graph, identity)
+        return dynamo_traversal
+
+    @pytest.fixture(scope="function")
+    def customer_node(self, traversal):
+        return traversal.traversal_node_dict[
+            CollectionAddress("dynamodb_example_test_dataset", "customer")
+        ].to_mock_execution_node()
+
+    @pytest.fixture(scope="function")
+    def customer_identifier_node(self, traversal):
+        return traversal.traversal_node_dict[
+            CollectionAddress("dynamodb_example_test_dataset", "customer_identifier")
+        ].to_mock_execution_node()
+
+    @pytest.fixture(scope="function")
+    def customer_row(self):
+        row = {
+            "customer_email": {"S": "customer-1@example.com"},
+            "name": {"S": "John Customer"},
+            "address_id": {"L": [{"S": "1"}, {"S": "2"}]},
+            "personal_info": {"M": {"gender": {"S": "male"}, "age": {"S": "99"}}},
+            "id": {"S": "1"},
+        }
+        return row
+
+    @pytest.fixture(scope="function")
+    def deserialized_customer_row(self, customer_row):
+        deserialized_customer_row = {}
+        deserializer = TypeDeserializer()
+        for key, value in customer_row.items():
+            deserialized_customer_row[key] = deserializer.deserialize(value)
+        return deserialized_customer_row
+
+    @pytest.fixture(scope="function")
+    def customer_identifier_row(self):
+        row = {
+            "customer_id": {"S": "customer-1@example.com"},
+            "email": {"S": "customer-1@example.com"},
+            "name": {"S": "Customer 1"},
+            "created": {"S": datetime.now(timezone.utc).isoformat()},
+        }
+        return row
+
+    @pytest.fixture(scope="function")
+    def deserialized_customer_identifier_row(self, customer_identifier_row):
+        deserialized_customer_identifier_row = {}
+        deserializer = TypeDeserializer()
+        for key, value in customer_identifier_row.items():
+            deserialized_customer_identifier_row[key] = deserializer.deserialize(value)
+        return deserialized_customer_identifier_row
+
+    def test_get_query_param_formatting_single_key(
+        self,
+        resources_dict,
+        customer_node,
+    ) -> None:
+        input_data = {
+            "fidesops_grouped_inputs": [],
+            "email": ["customer-test_uuid@example.com"],
+        }
+        attribute_definitions = [{"AttributeName": "email", "AttributeType": "S"}]
+        query_config = DynamoDBQueryConfig(customer_node, attribute_definitions)
+        item = query_config.generate_query(
+            input_data=input_data, policy=resources_dict["policy"]
+        )
+        assert item["ExpressionAttributeValues"] == {
+            ":value": {"S": "customer-test_uuid@example.com"}
+        }
+        assert item["KeyConditionExpression"] == "email = :value"
+
+    def test_put_query_param_formatting_single_key(
+        self,
+        erasure_policy,
+        customer_node,
+        deserialized_customer_row,
+    ) -> None:
+        input_data = {
+            "fidesops_grouped_inputs": [],
+            "email": ["customer-test_uuid@example.com"],
+        }
+        attribute_definitions = [{"AttributeName": "email", "AttributeType": "S"}]
+        query_config = DynamoDBQueryConfig(customer_node, attribute_definitions)
+        update_item = query_config.generate_update_stmt(
+            deserialized_customer_row, erasure_policy, privacy_request
+        )
+
+        assert update_item == {
+            "customer_email": {"S": "customer-1@example.com"},
+            "name": {"NULL": True},
+            "address_id": {"L": [{"S": "1"}, {"S": "2"}]},
+            "personal_info": {"M": {"gender": {"S": "male"}, "age": {"S": "99"}}},
+            "id": {"S": "1"},
+        }
diff --git a/tests/ops/service/connectors/test_mongo_query_config.py b/tests/ops/service/connectors/test_mongo_query_config.py
new file mode 100644
index 0000000000..3912618801
--- /dev/null
+++ b/tests/ops/service/connectors/test_mongo_query_config.py
@@ -0,0 +1,283 @@
+import pytest
+from fideslang.models import Dataset
+
+from fides.api.graph.config import (
+    CollectionAddress,
+    FieldAddress,
+    FieldPath,
+    ObjectField,
+    ScalarField,
+)
+from fides.api.graph.graph import DatasetGraph, Edge
+from fides.api.graph.traversal import Traversal
+from fides.api.models.datasetconfig import convert_dataset_to_graph
+from fides.api.models.privacy_request import PrivacyRequest
+from fides.api.schemas.masking.masking_configuration import HashMaskingConfiguration
+from fides.api.schemas.masking.masking_secrets import MaskingSecretCache, SecretType
+from fides.api.service.connectors.query_configs.mongodb_query_config import (
+    MongoQueryConfig,
+)
+from fides.api.service.masking.strategy.masking_strategy_hash import HashMaskingStrategy
+from fides.api.util.data_category import DataCategory
+
+from ...task.traversal_data import combined_mongo_postgresql_graph
+from ...test_helpers.cache_secrets_helper import cache_secret
+
+privacy_request = PrivacyRequest(id="234544")
+
+
+class TestMongoQueryConfig:
+    @pytest.fixture(scope="function")
+    def combined_traversal(self, connection_config, integration_mongodb_config):
+        mongo_dataset, postgres_dataset = combined_mongo_postgresql_graph(
+            connection_config, integration_mongodb_config
+        )
+        combined_dataset_graph = DatasetGraph(mongo_dataset, postgres_dataset)
+        combined_traversal = Traversal(
+            combined_dataset_graph,
+            {"email": "customer-1@examplecom"},
+        )
+        return combined_traversal
+
+    @pytest.fixture(scope="function")
+    def customer_details_node(self, combined_traversal):
+        return combined_traversal.traversal_node_dict[
+            CollectionAddress("mongo_test", "customer_details")
+        ].to_mock_execution_node()
+
+    @pytest.fixture(scope="function")
+    def customer_feedback_node(self, combined_traversal):
+        return combined_traversal.traversal_node_dict[
+            CollectionAddress("mongo_test", "customer_feedback")
+        ].to_mock_execution_node()
+
+    def test_field_map_nested(self, customer_details_node):
+        config = MongoQueryConfig(customer_details_node)
+
+        field_map = config.field_map()
+        assert isinstance(field_map[FieldPath("workplace_info")], ObjectField)
+        assert isinstance(
+            field_map[FieldPath("workplace_info", "employer")], ScalarField
+        )
+
+    def test_primary_key_field_paths(self, customer_details_node):
+        config = MongoQueryConfig(customer_details_node)
+        assert list(config.primary_key_field_paths.keys()) == [FieldPath("_id")]
+        assert isinstance(config.primary_key_field_paths[FieldPath("_id")], ScalarField)
+
+    def test_nested_query_field_paths(
+        self, customer_details_node, customer_feedback_node
+    ):
+        assert customer_details_node.query_field_paths == {
+            FieldPath("customer_id"),
+        }
+
+        assert customer_feedback_node.query_field_paths == {
+            FieldPath("customer_information", "email")
+        }
+
+    def test_nested_typed_filtered_values(self, customer_feedback_node):
+        """Identity data is located on a nested object"""
+        input_data = {
+            "customer_information.email": ["test@example.com"],
+            "ignore": ["abcde"],
+        }
+        assert customer_feedback_node.typed_filtered_values(input_data) == {
+            "customer_information.email": ["test@example.com"]
+        }
+
+    def test_generate_query(
+        self,
+        policy,
+        example_datasets,
+        integration_mongodb_config,
+        connection_config,
+    ):
+        dataset_postgres = Dataset(**example_datasets[0])
+        graph = convert_dataset_to_graph(dataset_postgres, connection_config.key)
+        dataset_mongo = Dataset(**example_datasets[1])
+        mongo_graph = convert_dataset_to_graph(
+            dataset_mongo, integration_mongodb_config.key
+        )
+        dataset_graph = DatasetGraph(*[graph, mongo_graph])
+        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
+        # Edge created from Root to nested customer_information.email field
+        assert (
+            Edge(
+                FieldAddress("__ROOT__", "__ROOT__", "email"),
+                FieldAddress(
+                    "mongo_test", "customer_feedback", "customer_information", "email"
+                ),
+            )
+            in traversal.edges
+        )
+
+        # Test query on nested field
+        customer_feedback = traversal.traversal_node_dict[
+            CollectionAddress("mongo_test", "customer_feedback")
+        ].to_mock_execution_node()
+        config = MongoQueryConfig(customer_feedback)
+        input_data = {"customer_information.email": ["customer-1@example.com"]}
+        # Tuple of query, projection - Searching for documents with nested
+        # customer_information.email = customer-1@example.com
+        assert config.generate_query(input_data, policy) == (
+            {"customer_information.email": "customer-1@example.com"},
+            {"_id": 1, "customer_information": 1, "date": 1, "message": 1, "rating": 1},
+        )
+
+        # Test query nested data
+        customer_details = traversal.traversal_node_dict[
+            CollectionAddress("mongo_test", "customer_details")
+        ].to_mock_execution_node()
+        config = MongoQueryConfig(customer_details)
+        input_data = {"customer_id": [1]}
+        # Tuple of query, projection - Projection is specifying fields at the top-level. Nested data will
+        # be filtered later.
+        assert config.generate_query(input_data, policy) == (
+            {"customer_id": 1},
+            {
+                "_id": 1,
+                "birthday": 1,
+                "comments": 1,
+                "customer_id": 1,
+                "customer_uuid": 1,
+                "emergency_contacts": 1,
+                "children": 1,
+                "gender": 1,
+                "travel_identifiers": 1,
+                "workplace_info": 1,
+            },
+        )
+
+    def test_generate_update_stmt_multiple_fields(
+        self,
+        erasure_policy,
+        example_datasets,
+        integration_mongodb_config,
+        connection_config,
+    ):
+        dataset_postgres = Dataset(**example_datasets[0])
+        graph = convert_dataset_to_graph(dataset_postgres, connection_config.key)
+        dataset_mongo = Dataset(**example_datasets[1])
+        mongo_graph = convert_dataset_to_graph(
+            dataset_mongo, integration_mongodb_config.key
+        )
+        dataset_graph = DatasetGraph(*[graph, mongo_graph])
+
+        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
+        customer_details = traversal.traversal_node_dict[
+            CollectionAddress("mongo_test", "customer_details")
+        ].to_mock_execution_node()
+        config = MongoQueryConfig(customer_details)
+        row = {
+            "birthday": "1988-01-10",
+            "gender": "male",
+            "customer_id": 1,
+            "_id": 1,
+            "workplace_info": {
+                "position": "Chief Strategist",
+                "direct_reports": ["Robbie Margo", "Sully Hunter"],
+            },
+            "emergency_contacts": [{"name": "June Customer", "phone": "444-444-4444"}],
+            "children": ["Christopher Customer", "Courtney Customer"],
+        }
+
+        # Make target more broad
+        rule = erasure_policy.rules[0]
+        target = rule.targets[0]
+        target.data_category = DataCategory("user").value
+
+        mongo_statement = config.generate_update_stmt(
+            row, erasure_policy, privacy_request
+        )
+
+        expected_result_0 = {"customer_id": 1}
+        expected_result_1 = {
+            "$set": {
+                "birthday": None,
+                "children.0": None,
+                "children.1": None,
+                "customer_id": None,
+                "emergency_contacts.0.name": None,
+                "workplace_info.direct_reports.0": None,  # Both direct reports are masked.
+                "workplace_info.direct_reports.1": None,
+                "emergency_contacts.0.phone": None,
+                "gender": None,
+                "workplace_info.position": None,
+            }
+        }
+
+        print(mongo_statement[1])
+        print(expected_result_1)
+        assert mongo_statement[0] == expected_result_0
+        assert mongo_statement[1] == expected_result_1
+
+    def test_generate_update_stmt_multiple_rules(
+        self,
+        erasure_policy_two_rules,
+        example_datasets,
+        integration_mongodb_config,
+        connection_config,
+    ):
+        dataset_postgres = Dataset(**example_datasets[0])
+        graph = convert_dataset_to_graph(dataset_postgres, connection_config.key)
+        dataset_mongo = Dataset(**example_datasets[1])
+        mongo_graph = convert_dataset_to_graph(
+            dataset_mongo, integration_mongodb_config.key
+        )
+        dataset_graph = DatasetGraph(*[graph, mongo_graph])
+
+        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
+
+        customer_details = traversal.traversal_node_dict[
+            CollectionAddress("mongo_test", "customer_details")
+        ].to_mock_execution_node()
+
+        config = MongoQueryConfig(customer_details)
+        row = {
+            "birthday": "1988-01-10",
+            "gender": "male",
+            "customer_id": 1,
+            "_id": 1,
+            "workplace_info": {
+                "position": "Chief Strategist",
+                "direct_reports": ["Robbie Margo", "Sully Hunter"],
+            },
+            "emergency_contacts": [{"name": "June Customer", "phone": "444-444-4444"}],
+            "children": ["Christopher Customer", "Courtney Customer"],
+        }
+
+        rule = erasure_policy_two_rules.rules[0]
+        rule.masking_strategy = {
+            "strategy": "hash",
+            "configuration": {"algorithm": "SHA-512"},
+        }
+        target = rule.targets[0]
+        target.data_category = DataCategory("user.demographic.date_of_birth").value
+
+        rule_two = erasure_policy_two_rules.rules[1]
+        rule_two.masking_strategy = {
+            "strategy": "random_string_rewrite",
+            "configuration": {"length": 30},
+        }
+        target = rule_two.targets[0]
+        target.data_category = DataCategory("user.demographic.gender").value
+        # cache secrets for hash strategy
+        secret = MaskingSecretCache[str](
+            secret="adobo",
+            masking_strategy=HashMaskingStrategy.name,
+            secret_type=SecretType.salt,
+        )
+        cache_secret(secret, privacy_request.id)
+
+        mongo_statement = config.generate_update_stmt(
+            row, erasure_policy_two_rules, privacy_request
+        )
+        assert mongo_statement[0] == {"customer_id": 1}
+        assert len(mongo_statement[1]["$set"]["gender"]) == 30
+        assert (
+            mongo_statement[1]["$set"]["birthday"]
+            == HashMaskingStrategy(HashMaskingConfiguration(algorithm="SHA-512")).mask(
+                ["1988-01-10"], request_id=privacy_request.id
+            )[0]
+        )
diff --git a/tests/ops/service/connectors/test_query_config.py b/tests/ops/service/connectors/test_query_config.py
index 75c9b26c1b..2aa0871255 100644
--- a/tests/ops/service/connectors/test_query_config.py
+++ b/tests/ops/service/connectors/test_query_config.py
@@ -1,43 +1,28 @@
-from datetime import datetime, timezone
 from typing import Any, Dict, Set
 from unittest import mock
 
 import pytest
-from boto3.dynamodb.types import TypeDeserializer
 from fideslang.models import Dataset
 
 from fides.api.common_exceptions import MissingNamespaceSchemaException
-from fides.api.graph.config import (
-    CollectionAddress,
-    FieldAddress,
-    FieldPath,
-    ObjectField,
-    ScalarField,
-)
+from fides.api.graph.config import CollectionAddress, FieldPath
 from fides.api.graph.execution import ExecutionNode
-from fides.api.graph.graph import DatasetGraph, Edge
+from fides.api.graph.graph import DatasetGraph
 from fides.api.graph.traversal import Traversal, TraversalNode
 from fides.api.models.datasetconfig import convert_dataset_to_graph
 from fides.api.models.privacy_request import PrivacyRequest
 from fides.api.schemas.masking.masking_configuration import HashMaskingConfiguration
 from fides.api.schemas.masking.masking_secrets import MaskingSecretCache, SecretType
 from fides.api.schemas.namespace_meta.namespace_meta import NamespaceMeta
-from fides.api.service.connectors.query_configs.dynamodb_query_config import (
-    DynamoDBQueryConfig,
-)
-from fides.api.service.connectors.query_configs.mongodb_query_config import (
-    MongoQueryConfig,
-)
 from fides.api.service.connectors.query_configs.query_config import (
     QueryConfig,
     SQLQueryConfig,
 )
-from fides.api.service.connectors.scylla_query_config import ScyllaDBQueryConfig
 from fides.api.service.masking.strategy.masking_strategy_hash import HashMaskingStrategy
 from fides.api.util.data_category import DataCategory
 from tests.fixtures.application_fixtures import load_dataset
 
-from ...task.traversal_data import combined_mongo_postgresql_graph, integration_db_graph
+from ...task.traversal_data import integration_db_graph
 from ...test_helpers.cache_secrets_helper import cache_secret, clear_cache_secrets
 
 # customers -> address, order
@@ -461,416 +446,6 @@ def test_generate_update_stmts_from_multiple_rules(
             text_clause._bindparams["masked_email"].value == "*****"
         )  # String rewrite masking strategy
 
-
-class TestMongoQueryConfig:
-    @pytest.fixture(scope="function")
-    def combined_traversal(self, connection_config, integration_mongodb_config):
-        mongo_dataset, postgres_dataset = combined_mongo_postgresql_graph(
-            connection_config, integration_mongodb_config
-        )
-        combined_dataset_graph = DatasetGraph(mongo_dataset, postgres_dataset)
-        combined_traversal = Traversal(
-            combined_dataset_graph,
-            {"email": "customer-1@examplecom"},
-        )
-        return combined_traversal
-
-    @pytest.fixture(scope="function")
-    def customer_details_node(self, combined_traversal):
-        return combined_traversal.traversal_node_dict[
-            CollectionAddress("mongo_test", "customer_details")
-        ].to_mock_execution_node()
-
-    @pytest.fixture(scope="function")
-    def customer_feedback_node(self, combined_traversal):
-        return combined_traversal.traversal_node_dict[
-            CollectionAddress("mongo_test", "customer_feedback")
-        ].to_mock_execution_node()
-
-    def test_field_map_nested(self, customer_details_node):
-        config = MongoQueryConfig(customer_details_node)
-
-        field_map = config.field_map()
-        assert isinstance(field_map[FieldPath("workplace_info")], ObjectField)
-        assert isinstance(
-            field_map[FieldPath("workplace_info", "employer")], ScalarField
-        )
-
-    def test_primary_key_field_paths(self, customer_details_node):
-        config = MongoQueryConfig(customer_details_node)
-        assert list(config.primary_key_field_paths.keys()) == [FieldPath("_id")]
-        assert isinstance(config.primary_key_field_paths[FieldPath("_id")], ScalarField)
-
-    def test_nested_query_field_paths(
-        self, customer_details_node, customer_feedback_node
-    ):
-        assert customer_details_node.query_field_paths == {
-            FieldPath("customer_id"),
-        }
-
-        assert customer_feedback_node.query_field_paths == {
-            FieldPath("customer_information", "email")
-        }
-
-    def test_nested_typed_filtered_values(self, customer_feedback_node):
-        """Identity data is located on a nested object"""
-        input_data = {
-            "customer_information.email": ["test@example.com"],
-            "ignore": ["abcde"],
-        }
-        assert customer_feedback_node.typed_filtered_values(input_data) == {
-            "customer_information.email": ["test@example.com"]
-        }
-
-    def test_generate_query(
-        self,
-        policy,
-        example_datasets,
-        integration_mongodb_config,
-        connection_config,
-    ):
-        dataset_postgres = Dataset(**example_datasets[0])
-        graph = convert_dataset_to_graph(dataset_postgres, connection_config.key)
-        dataset_mongo = Dataset(**example_datasets[1])
-        mongo_graph = convert_dataset_to_graph(
-            dataset_mongo, integration_mongodb_config.key
-        )
-        dataset_graph = DatasetGraph(*[graph, mongo_graph])
-        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
-        # Edge created from Root to nested customer_information.email field
-        assert (
-            Edge(
-                FieldAddress("__ROOT__", "__ROOT__", "email"),
-                FieldAddress(
-                    "mongo_test", "customer_feedback", "customer_information", "email"
-                ),
-            )
-            in traversal.edges
-        )
-
-        # Test query on nested field
-        customer_feedback = traversal.traversal_node_dict[
-            CollectionAddress("mongo_test", "customer_feedback")
-        ].to_mock_execution_node()
-        config = MongoQueryConfig(customer_feedback)
-        input_data = {"customer_information.email": ["customer-1@example.com"]}
-        # Tuple of query, projection - Searching for documents with nested
-        # customer_information.email = customer-1@example.com
-        assert config.generate_query(input_data, policy) == (
-            {"customer_information.email": "customer-1@example.com"},
-            {"_id": 1, "customer_information": 1, "date": 1, "message": 1, "rating": 1},
-        )
-
-        # Test query nested data
-        customer_details = traversal.traversal_node_dict[
-            CollectionAddress("mongo_test", "customer_details")
-        ].to_mock_execution_node()
-        config = MongoQueryConfig(customer_details)
-        input_data = {"customer_id": [1]}
-        # Tuple of query, projection - Projection is specifying fields at the top-level. Nested data will
-        # be filtered later.
-        assert config.generate_query(input_data, policy) == (
-            {"customer_id": 1},
-            {
-                "_id": 1,
-                "birthday": 1,
-                "comments": 1,
-                "customer_id": 1,
-                "customer_uuid": 1,
-                "emergency_contacts": 1,
-                "children": 1,
-                "gender": 1,
-                "travel_identifiers": 1,
-                "workplace_info": 1,
-            },
-        )
-
-    def test_generate_update_stmt_multiple_fields(
-        self,
-        erasure_policy,
-        example_datasets,
-        integration_mongodb_config,
-        connection_config,
-    ):
-        dataset_postgres = Dataset(**example_datasets[0])
-        graph = convert_dataset_to_graph(dataset_postgres, connection_config.key)
-        dataset_mongo = Dataset(**example_datasets[1])
-        mongo_graph = convert_dataset_to_graph(
-            dataset_mongo, integration_mongodb_config.key
-        )
-        dataset_graph = DatasetGraph(*[graph, mongo_graph])
-
-        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
-        customer_details = traversal.traversal_node_dict[
-            CollectionAddress("mongo_test", "customer_details")
-        ].to_mock_execution_node()
-        config = MongoQueryConfig(customer_details)
-        row = {
-            "birthday": "1988-01-10",
-            "gender": "male",
-            "customer_id": 1,
-            "_id": 1,
-            "workplace_info": {
-                "position": "Chief Strategist",
-                "direct_reports": ["Robbie Margo", "Sully Hunter"],
-            },
-            "emergency_contacts": [{"name": "June Customer", "phone": "444-444-4444"}],
-            "children": ["Christopher Customer", "Courtney Customer"],
-        }
-
-        # Make target more broad
-        rule = erasure_policy.rules[0]
-        target = rule.targets[0]
-        target.data_category = DataCategory("user").value
-
-        mongo_statement = config.generate_update_stmt(
-            row, erasure_policy, privacy_request
-        )
-
-        expected_result_0 = {"customer_id": 1}
-        expected_result_1 = {
-            "$set": {
-                "birthday": None,
-                "children.0": None,
-                "children.1": None,
-                "customer_id": None,
-                "emergency_contacts.0.name": None,
-                "workplace_info.direct_reports.0": None,  # Both direct reports are masked.
-                "workplace_info.direct_reports.1": None,
-                "emergency_contacts.0.phone": None,
-                "gender": None,
-                "workplace_info.position": None,
-            }
-        }
-
-        print(mongo_statement[1])
-        print(expected_result_1)
-        assert mongo_statement[0] == expected_result_0
-        assert mongo_statement[1] == expected_result_1
-
-    def test_generate_update_stmt_multiple_rules(
-        self,
-        erasure_policy_two_rules,
-        example_datasets,
-        integration_mongodb_config,
-        connection_config,
-    ):
-        dataset_postgres = Dataset(**example_datasets[0])
-        graph = convert_dataset_to_graph(dataset_postgres, connection_config.key)
-        dataset_mongo = Dataset(**example_datasets[1])
-        mongo_graph = convert_dataset_to_graph(
-            dataset_mongo, integration_mongodb_config.key
-        )
-        dataset_graph = DatasetGraph(*[graph, mongo_graph])
-
-        traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"})
-
-        customer_details = traversal.traversal_node_dict[
-            CollectionAddress("mongo_test", "customer_details")
-        ].to_mock_execution_node()
-
-        config = MongoQueryConfig(customer_details)
-        row = {
-            "birthday": "1988-01-10",
-            "gender": "male",
-            "customer_id": 1,
-            "_id": 1,
-            "workplace_info": {
-                "position": "Chief Strategist",
-                "direct_reports": ["Robbie Margo", "Sully Hunter"],
-            },
-            "emergency_contacts": [{"name": "June Customer", "phone": "444-444-4444"}],
-            "children": ["Christopher Customer", "Courtney Customer"],
-        }
-
-        rule = erasure_policy_two_rules.rules[0]
-        rule.masking_strategy = {
-            "strategy": "hash",
-            "configuration": {"algorithm": "SHA-512"},
-        }
-        target = rule.targets[0]
-        target.data_category = DataCategory("user.demographic.date_of_birth").value
-
-        rule_two = erasure_policy_two_rules.rules[1]
-        rule_two.masking_strategy = {
-            "strategy": "random_string_rewrite",
-            "configuration": {"length": 30},
-        }
-        target = rule_two.targets[0]
-        target.data_category = DataCategory("user.demographic.gender").value
-        # cache secrets for hash strategy
-        secret = MaskingSecretCache[str](
-            secret="adobo",
-            masking_strategy=HashMaskingStrategy.name,
-            secret_type=SecretType.salt,
-        )
-        cache_secret(secret, privacy_request.id)
-
-        mongo_statement = config.generate_update_stmt(
-            row, erasure_policy_two_rules, privacy_request
-        )
-        assert mongo_statement[0] == {"customer_id": 1}
-        assert len(mongo_statement[1]["$set"]["gender"]) == 30
-        assert (
-            mongo_statement[1]["$set"]["birthday"]
-            == HashMaskingStrategy(HashMaskingConfiguration(algorithm="SHA-512")).mask(
-                ["1988-01-10"], request_id=privacy_request.id
-            )[0]
-        )
-
-
-class TestDynamoDBQueryConfig:
-    @pytest.fixture(scope="function")
-    def identity(self):
-        identity = {"email": "customer-test_uuid@example.com"}
-        return identity
-
-    @pytest.fixture(scope="function")
-    def dataset_graph(self, integration_dynamodb_config, example_datasets):
-        dataset = Dataset(**example_datasets[11])
-        dataset_graph = convert_dataset_to_graph(
-            dataset, integration_dynamodb_config.key
-        )
-
-        return DatasetGraph(*[dataset_graph])
-
-    @pytest.fixture(scope="function")
-    def traversal(self, identity, dataset_graph):
-        dynamo_traversal = Traversal(dataset_graph, identity)
-        return dynamo_traversal
-
-    @pytest.fixture(scope="function")
-    def customer_node(self, traversal):
-        return traversal.traversal_node_dict[
-            CollectionAddress("dynamodb_example_test_dataset", "customer")
-        ].to_mock_execution_node()
-
-    @pytest.fixture(scope="function")
-    def customer_identifier_node(self, traversal):
-        return traversal.traversal_node_dict[
-            CollectionAddress("dynamodb_example_test_dataset", "customer_identifier")
-        ].to_mock_execution_node()
-
-    @pytest.fixture(scope="function")
-    def customer_row(self):
-        row = {
-            "customer_email": {"S": "customer-1@example.com"},
-            "name": {"S": "John Customer"},
-            "address_id": {"L": [{"S": "1"}, {"S": "2"}]},
-            "personal_info": {"M": {"gender": {"S": "male"}, "age": {"S": "99"}}},
-            "id": {"S": "1"},
-        }
-        return row
-
-    @pytest.fixture(scope="function")
-    def deserialized_customer_row(self, customer_row):
-        deserialized_customer_row = {}
-        deserializer = TypeDeserializer()
-        for key, value in customer_row.items():
-            deserialized_customer_row[key] = deserializer.deserialize(value)
-        return deserialized_customer_row
-
-    @pytest.fixture(scope="function")
-    def customer_identifier_row(self):
-        row = {
-            "customer_id": {"S": "customer-1@example.com"},
-            "email": {"S": "customer-1@example.com"},
-            "name": {"S": "Customer 1"},
-            "created": {"S": datetime.now(timezone.utc).isoformat()},
-        }
-        return row
-
-    @pytest.fixture(scope="function")
-    def deserialized_customer_identifier_row(self, customer_identifier_row):
-        deserialized_customer_identifier_row = {}
-        deserializer = TypeDeserializer()
-        for key, value in customer_identifier_row.items():
-            deserialized_customer_identifier_row[key] = deserializer.deserialize(value)
-        return deserialized_customer_identifier_row
-
-    def test_get_query_param_formatting_single_key(
-        self,
-        resources_dict,
-        customer_node,
-    ) -> None:
-        input_data = {
-            "fidesops_grouped_inputs": [],
-            "email": ["customer-test_uuid@example.com"],
-        }
-        attribute_definitions = [{"AttributeName": "email", "AttributeType": "S"}]
-        query_config = DynamoDBQueryConfig(customer_node, attribute_definitions)
-        item = query_config.generate_query(
-            input_data=input_data, policy=resources_dict["policy"]
-        )
-        assert item["ExpressionAttributeValues"] == {
-            ":value": {"S": "customer-test_uuid@example.com"}
-        }
-        assert item["KeyConditionExpression"] == "email = :value"
-
-    def test_put_query_param_formatting_single_key(
-        self,
-        erasure_policy,
-        customer_node,
-        deserialized_customer_row,
-    ) -> None:
-        input_data = {
-            "fidesops_grouped_inputs": [],
-            "email": ["customer-test_uuid@example.com"],
-        }
-        attribute_definitions = [{"AttributeName": "email", "AttributeType": "S"}]
-        query_config = DynamoDBQueryConfig(customer_node, attribute_definitions)
-        update_item = query_config.generate_update_stmt(
-            deserialized_customer_row, erasure_policy, privacy_request
-        )
-
-        assert update_item == {
-            "customer_email": {"S": "customer-1@example.com"},
-            "name": {"NULL": True},
-            "address_id": {"L": [{"S": "1"}, {"S": "2"}]},
-            "personal_info": {"M": {"gender": {"S": "male"}, "age": {"S": "99"}}},
-            "id": {"S": "1"},
-        }
-
-
-class TestScyllaDBQueryConfig:
-    @pytest.fixture(scope="function")
-    def complete_execution_node(
-        self, example_datasets, integration_scylladb_config_with_keyspace
-    ):
-        dataset = Dataset(**example_datasets[15])
-        graph = convert_dataset_to_graph(
-            dataset, integration_scylladb_config_with_keyspace.key
-        )
-        dataset_graph = DatasetGraph(*[graph])
-        identity = {"email": "customer-1@example.com"}
-        scylla_traversal = Traversal(dataset_graph, identity)
-        return scylla_traversal.traversal_node_dict[
-            CollectionAddress("scylladb_example_test_dataset", "users")
-        ].to_mock_execution_node()
-
-    def test_dry_run_query_no_data(self, scylladb_execution_node):
-        query_config = ScyllaDBQueryConfig(scylladb_execution_node)
-        dry_run_query = query_config.dry_run_query()
-        assert dry_run_query is None
-
-    def test_dry_run_query_with_data(self, complete_execution_node):
-        query_config = ScyllaDBQueryConfig(complete_execution_node)
-        dry_run_query = query_config.dry_run_query()
-        assert (
-            dry_run_query
-            == "SELECT age, alternative_contacts, ascii_data, big_int_data, do_not_contact, double_data, duration, email, float_data, last_contacted, logins, name, states_lived, timestamp, user_id, uuid FROM users WHERE email = ? ALLOW FILTERING;"
-        )
-
-    def test_query_to_str(self, complete_execution_node):
-        query_config = ScyllaDBQueryConfig(complete_execution_node)
-        statement = (
-            "SELECT name FROM users WHERE email = %(email)s",
-            {"email": "test@example.com"},
-        )
-        query_to_str = query_config.query_to_str(statement, {})
-        assert query_to_str == "SELECT name FROM users WHERE email = 'test@example.com'"
-
-
 class TestSQLLikeQueryConfig:
     def test_missing_namespace_meta_schema(self):
 
diff --git a/tests/ops/service/connectors/test_scylladb_query_config.py b/tests/ops/service/connectors/test_scylladb_query_config.py
new file mode 100644
index 0000000000..3cbc6f493f
--- /dev/null
+++ b/tests/ops/service/connectors/test_scylladb_query_config.py
@@ -0,0 +1,47 @@
+import pytest
+from fideslang.models import Dataset
+
+from fides.api.graph.config import CollectionAddress
+from fides.api.graph.graph import DatasetGraph
+from fides.api.graph.traversal import Traversal
+from fides.api.models.datasetconfig import convert_dataset_to_graph
+from fides.api.service.connectors.scylla_query_config import ScyllaDBQueryConfig
+
+
+class TestScyllaDBQueryConfig:
+    @pytest.fixture(scope="function")
+    def complete_execution_node(
+        self, example_datasets, integration_scylladb_config_with_keyspace
+    ):
+        dataset = Dataset(**example_datasets[15])
+        graph = convert_dataset_to_graph(
+            dataset, integration_scylladb_config_with_keyspace.key
+        )
+        dataset_graph = DatasetGraph(*[graph])
+        identity = {"email": "customer-1@example.com"}
+        scylla_traversal = Traversal(dataset_graph, identity)
+        return scylla_traversal.traversal_node_dict[
+            CollectionAddress("scylladb_example_test_dataset", "users")
+        ].to_mock_execution_node()
+
+    def test_dry_run_query_no_data(self, scylladb_execution_node):
+        query_config = ScyllaDBQueryConfig(scylladb_execution_node)
+        dry_run_query = query_config.dry_run_query()
+        assert dry_run_query is None
+
+    def test_dry_run_query_with_data(self, complete_execution_node):
+        query_config = ScyllaDBQueryConfig(complete_execution_node)
+        dry_run_query = query_config.dry_run_query()
+        assert (
+            dry_run_query
+            == "SELECT age, alternative_contacts, ascii_data, big_int_data, do_not_contact, double_data, duration, email, float_data, last_contacted, logins, name, states_lived, timestamp, user_id, uuid FROM users WHERE email = ? ALLOW FILTERING;"
+        )
+
+    def test_query_to_str(self, complete_execution_node):
+        query_config = ScyllaDBQueryConfig(complete_execution_node)
+        statement = (
+            "SELECT name FROM users WHERE email = %(email)s",
+            {"email": "test@example.com"},
+        )
+        query_to_str = query_config.query_to_str(statement, {})
+        assert query_to_str == "SELECT name FROM users WHERE email = 'test@example.com'"
diff --git a/tests/ops/task/traversal_data.py b/tests/ops/task/traversal_data.py
index d0ef50ae18..07ff478e3e 100644
--- a/tests/ops/task/traversal_data.py
+++ b/tests/ops/task/traversal_data.py
@@ -156,7 +156,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
             {
                 "name": "address",
                 "fields": [
-                    {"name": "_id"},
+                    {"name": "_id", "fides_meta": {"primary_key": True}},
                     {
                         "name": "id",
                         "fides_meta": {
@@ -178,7 +178,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
             {
                 "name": "orders",
                 "fields": [
-                    {"name": "_id"},
+                    {"name": "_id", "fides_meta": {"primary_key": True}},
                     {
                         "name": "customer_id",
                         "fides_meta": {
@@ -200,6 +200,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                     {
                         "name": "_id",
                         "fides_meta": {
+                            "primary_key": True,
                             "data_type": "object_id",
                         },
                     },
@@ -228,6 +229,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                     {
                         "name": "_id",
                         "fides_meta": {
+                            "primary_key": True,
                             "data_type": "object_id",
                         },
                     },
@@ -237,19 +239,27 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                         "fields": [
                             {
                                 "name": "comment",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "message",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "chat_name",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "ccn",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                         ],
                     },
@@ -258,7 +268,12 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
             {
                 "name": "customer_details",
                 "fields": [
-                    {"name": "_id", "fides_meta": {"primary_key": True}},
+                    {
+                        "name": "_id",
+                        "fides_meta": {
+                            "primary_key": True,
+                        },
+                    },
                     {
                         "name": "birthday",
                         "fides_meta": {"data_type": "string"},
@@ -304,15 +319,21 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                         "fields": [
                             {
                                 "name": "name",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "relationship",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "phone",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                         ],
                     },
@@ -330,11 +351,15 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                         "fields": [
                             {
                                 "name": "employer",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "position",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "direct_reports",
@@ -350,6 +375,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                     {
                         "name": "_id",
                         "fides_meta": {
+                            "primary_key": True,
                             "data_type": "object_id",
                         },
                     },
@@ -366,11 +392,15 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                             },
                             {
                                 "name": "phone",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                             {
                                 "name": "internal_customer_id",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                         ],
                     },
@@ -394,6 +424,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                     {
                         "name": "_id",
                         "fides_meta": {
+                            "primary_key": True,
                             "data_type": "object_id",
                         },
                     },
@@ -407,6 +438,7 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                     {
                         "name": "id",
                         "fides_meta": {
+                            "primary_key": True,
                             "references": [
                                 {
                                     "dataset": mongo_db_name,
@@ -427,7 +459,10 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                 "fields": [
                     {
                         "name": "_id",
-                        "fides_meta": {"data_type": "object_id"},
+                        "fides_meta": {
+                            "primary_key": True,
+                            "data_type": "object_id",
+                        },
                     },
                     {
                         "name": "date",
@@ -454,7 +489,9 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                             },
                             {
                                 "name": "full_name",
-                                "fides_meta": {"data_type": "string"},
+                                "fides_meta": {
+                                    "data_type": "string",
+                                },
                             },
                         ],
                     },
@@ -473,7 +510,10 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                 "fields": [
                     {
                         "name": "_id",
-                        "fides_meta": {"data_type": "object_id"},
+                        "fides_meta": {
+                            "primary_key": True,
+                            "data_type": "object_id",
+                        },
                     },
                     {
                         "name": "customer_identifiers",
@@ -520,7 +560,10 @@ def mongo_dataset_dict(mongo_db_name: str, postgres_db_name: str) -> GraphDatase
                 "fields": [
                     {
                         "name": "_id",
-                        "fides_meta": {"data_type": "object_id"},
+                        "fides_meta": {
+                            "primary_key": True,
+                            "data_type": "object_id",
+                        },
                     },
                     {
                         "name": "owner",