Skip to content

Commit

Permalink
make primary key (id,entityTypeID)
Browse files Browse the repository at this point in the history
  • Loading branch information
MohitYadav-codes committed Jun 27, 2024
1 parent 1c44e46 commit 67212c2
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 46 deletions.
68 changes: 36 additions & 32 deletions akm_tools/validation/data_context_validators.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import Dict, List, Any
from typing import Dict, List, Any, Tuple
from .global_debug_config import GlobalDebugConfig
from .custom_exceptions import IDConflictException, BaseInstanceOverwiteException, InvalidReferentIDException

Expand All @@ -21,15 +21,15 @@ def create_instance_dict(self, all_data):
# Populate the instance_dict dictionary
instance_dict = {}
for instance in all_data:
if "id" in instance:
instance_id = instance["id"]
if instance_id not in instance_dict:
# Initialize the ID key with a list containing the current instance
instance_dict[instance_id] = {"count": 1, "instances": [instance]}
if "id" in instance and "entityTypeID" in instance:
instance_key = (instance["id"], instance["entityTypeID"])
if instance_key not in instance_dict:
# Initialize the composite key with a list containing the current instance
instance_dict[instance_key] = {"count": 1, "instances": [instance]}
else:
# Append the current instance to the list and increment the count
instance_dict[instance_id]["instances"].append(instance)
instance_dict[instance_id]["count"] += 1
instance_dict[instance_key]["instances"].append(instance)
instance_dict[instance_key]["count"] += 1
return instance_dict

def _handle_error(self, exception_type, *args):
Expand All @@ -42,7 +42,7 @@ def _handle_error(self, exception_type, *args):

class ExtendedInstanceContentValidator(AllDataContextValidators):
"""
For Instances with duplicate "id", where one extends the other,
For Instances with duplicate ("id", "entityTypeID"), where one extends the other,
check if the extended Instance does not overwrite content of base instance
"""

Expand All @@ -53,12 +53,12 @@ def validate_data_contexts(self, all_data: List[Dict[str, Any]]):
valid_data = []
instance_dict = self.__class__.create_instance_dict(all_data)

# Handle instances with same ids and prepare valid_data
for instance_id, instance_content in instance_dict.items():
# Handle instances with same composite keys and prepare valid_data
for instance_key, instance_content in instance_dict.items():
if len(instance_content) > 2:
self._handle_multiple_id_conflicts(instance_content)
if instance_content["count"] == 2:
# check if the insances are not overriding , but only extending existing data.
# check if the instances are not overriding, but only extending existing data.
is_valid_extension, base_instance, extended_instance = self.__class__.check_data_is_extended_not_overwritten(
instance_content["instances"]
)
Expand Down Expand Up @@ -100,44 +100,48 @@ def __init__(self):
self.id_set = set()

def validate_data_contexts(self, all_data):
# Create a dictionary mapping IDs to data instances
id_to_instance = {instance["id"]: instance for instance in all_data if "id" in instance}

# Create a dictionary mapping IDs to their validity
id_to_validity = {id: None for id in id_to_instance}

def is_valid(id):
# If the ID is not in the dictionary, it's invalid
if id not in id_to_instance:
# Create a dictionary mapping composite keys to data instances
id_to_instance = {
(instance["id"], instance["entityTypeID"]): instance
for instance in all_data if "id" in instance and "entityTypeID" in instance
}

# Create a dictionary mapping composite keys to their validity
id_to_validity = {key: None for key in id_to_instance}

def is_valid(key: Tuple[Any, Any]):
# If the composite key is not in the dictionary, it's invalid
if key not in id_to_instance:
return False

# If the validity has already been determined, return it
if id_to_validity[id] is not None:
return id_to_validity[id]
if id_to_validity[key] is not None:
return id_to_validity[key]

# Mark the ID as being checked to handle circular references
id_to_validity[id] = False
# Mark the composite key as being checked to handle circular references
id_to_validity[key] = False

instance = id_to_instance[id]
for key, value in instance.items():
instance = id_to_instance[key]
for value in instance.values():
if (
isinstance(value, dict)
and "referentEntityTypeID" in value ## this is hard dependency to schema for akm.Reference
and "referentID" in value
):
if not is_valid(value["referentID"]):
referent_key = (value["referentID"], value["referentEntityTypeID"])
if not is_valid(referent_key):
return False

# If all references are valid, the instance is valid
id_to_validity[id] = True
id_to_validity[key] = True
return True

# Validate the references
for id in id_to_instance:
is_valid(id)
for key in id_to_instance:
is_valid(key)

# Collect the valid data
valid_data = [instance for id, instance in id_to_instance.items() if id_to_validity[id]]
valid_data = [instance for key, instance in id_to_instance.items() if id_to_validity[key]]

return valid_data

Expand Down
15 changes: 9 additions & 6 deletions tests/test_AllDataContextValidators.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
import pytest
from akm_tools.validation.data_context_validators import AllDataContextValidators


def test_create_instance_dict():
"""
Test case for the create_instance_dict method of the AllDataContextValidators class.
This test checks if the create_instance_dict method correctly creates a dictionary
that maps instance id's to a dictionary containing the count of instances with that ID
and a list of the instances themselves.
that maps instance composite keys (id, entityTypeID) to a dictionary containing the count
of instances with that composite key and a list of the instances themselves.
"""
all_data = [{"id": "1a", "name": "test1"}, {"id": "2b", "name": "test2"}, {"id": "1a", "name": "test3"}]
all_data = [
{"id": "1a", "entityTypeID": "type1", "name": "test1"},
{"id": "2b", "entityTypeID": "type2", "name": "test2"},
{"id": "1a", "entityTypeID": "type1", "name": "test3"}
]
instance_dict = AllDataContextValidators.create_instance_dict(all_data)
expected_dict = {
"1a": {"count": 2, "instances": [{"id": "1a", "name": "test1"}, {"id": "1a", "name": "test3"}]},
"2b": {"count": 1, "instances": [{"id": "2b", "name": "test2"}]},
("1a", "type1"): {"count": 2, "instances": [{"id": "1a", "entityTypeID": "type1", "name": "test1"}, {"id": "1a", "entityTypeID": "type1", "name": "test3"}]},
("2b", "type2"): {"count": 1, "instances": [{"id": "2b", "entityTypeID": "type2", "name": "test2"}]},
}
assert instance_dict == expected_dict, "The instance dictionary was not created correctly."
9 changes: 5 additions & 4 deletions tests/test_CrossReferenceValidator.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,21 +51,22 @@ def circular_references():
def test_invalid_chain_of_references(invalid_chain_of_references):
validator = CrossReferenceValidator()
valid_data = validator.validate_data_contexts(invalid_chain_of_references)
assert len(valid_data) == 0, "The validator should return False for all instances"
assert len(valid_data) == 0, "The validator should return an empty list for invalid references"


def test_valid_chain_of_reference(valid_chain_of_reference):
validator = CrossReferenceValidator()
valid_data = validator.validate_data_contexts(valid_chain_of_reference)
assert len(valid_data) == 3, "The validator should return True for valid cross-references"
assert len(valid_data) == 3, "The validator should return all instances for valid cross-references"


def test_reference_not_present(reference_not_present):
validator = CrossReferenceValidator()
valid_data = validator.validate_data_contexts(reference_not_present)
assert len(valid_data) == 0, "The validator should return False for all instances"
assert len(valid_data) == 0, "The validator should return an empty list for missing references"


def test_circular_references(circular_references):
validator = CrossReferenceValidator()
assert validator.validate_data_contexts(circular_references) == []
valid_data = validator.validate_data_contexts(circular_references)
assert len(valid_data) == 0, "The validator should return an empty list for circular references"
10 changes: 6 additions & 4 deletions tests/test_ExtendedInstanceContentValidator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,32 @@ def test_extended_data_is_used(overlay_existing_data_with_addional_properties):
valid_data = validator.validate_data_contexts(overlay_existing_data_with_addional_properties)
assert valid_data[0] == {
"id": "unique_id1",
"entityTypeID": "type3",
"entityType": "ObjectType3",
"extended_property": "any string",
}
assert len(validator.warning_messages) == 1
assert len(validator.error_messages) == 0


def test_overriding_base_data_not_allowed(ovewrite_existing_data):
def test_overriding_base_data_not_allowed(overwrite_existing_data):
validator = ExtendedInstanceContentValidator()
valid_data = validator.validate_data_contexts(ovewrite_existing_data)
valid_data = validator.validate_data_contexts(overwrite_existing_data)
assert valid_data[0] == {
"id": "unique_id1",
"entityTypeID": "type3",
"description": "description for unique_id1",
"entityType": "ObjectType3",
}
assert len(validator.warning_messages) == 0
assert len(validator.error_messages) == 1


def test_overriding_base_data_in_debug_mode_raises_exception(ovewrite_existing_data):
def test_overriding_base_data_in_debug_mode_raises_exception(overwrite_existing_data):
GlobalDebugConfig.set_debug_mode()
validator = ExtendedInstanceContentValidator()
try:
valid_data = validator.validate_data_contexts(ovewrite_existing_data)
valid_data = validator.validate_data_contexts(overwrite_existing_data)
pytest.fail("BaseInstanceOverwiteException was not raised when expected.")
except BaseInstanceOverwiteException as e:
assert True
Expand Down

0 comments on commit 67212c2

Please sign in to comment.