From 6f2d910d11e2cde99d015584e0dae70b4e5d6159 Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Mon, 11 Nov 2024 14:53:58 -0500 Subject: [PATCH 01/17] Adding external process to creation action #523 --- src/schema/schema_constants.py | 2 +- src/schema/schema_neo4j_queries.py | 33 ++++++++++++++++++++++++++++++ src/schema/schema_validators.py | 7 +++++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/schema/schema_constants.py b/src/schema/schema_constants.py index d1d661d..be14a0a 100644 --- a/src/schema/schema_constants.py +++ b/src/schema/schema_constants.py @@ -23,7 +23,7 @@ class SchemaConstants(object): DOI_BASE_URL = 'https://doi.org/' - ALLOWED_SINGLE_CREATION_ACTIONS = ['central process', 'lab process'] + ALLOWED_SINGLE_CREATION_ACTIONS = ['central process', 'lab process', 'external process'] ALLOWED_MULTI_CREATION_ACTIONS = ['multi-assay split'] ALLOWED_DATASET_STATUSES = ['new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted', 'incomplete'] diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index d9cc8e8..8d04045 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -107,6 +107,39 @@ def get_dataset_direct_descendants(neo4j_driver, uuid, property_key=None, match_ return results + +""" +Get the uuids for each entity in a list that doesn't belong to a certain entity type. Uuids are ordered by type + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +direct_ancestor_uuids : list + List of the uuids to be filtered +entity_type : string + The entity to be excluded + +Returns +------- +dict + A dictionary of entity uuids that don't pass the filter, grouped by entity_type +""" + + +def filter_ancestors_by_type(neo4j_driver, direct_ancestor_uuids, entity_type): + query = (f"MATCH (e:Entity) " + f"WHERE e.uuid in {direct_ancestor_uuids} AND toLower(e.entity_type) <> '{entity_type.lower()}' " + f"RETURN e.entity_type AS entity_type, collect(e.uuid) AS uuids") + logger.info("======filter_ancestors_by_type======") + logger.info(query) + + with neo4j_driver.session() as session: + records = session.run(query).data() + + return records if records else None + + """ Get the origin (organ) sample ancestor of a given entity by uuid diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index abfe6f8..36c4afe 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -497,6 +497,13 @@ def validate_creation_action(property_key, normalized_entity_type, request, exis if creation_action == '': raise ValueError(f"The property {property_key} cannot be empty, when specified.") + if creation_action == 'external process': + direct_ancestor_uuids = new_data_dict.get('direct_ancestor_uuids') + entity_types_dict = schema_neo4j_queries.filter_ancestors_by_type(schema_manager.get_neo4j_driver_instance(), direct_ancestor_uuids, "dataset") + if entity_types_dict: + raise ValueError("If 'creation_action' field is given, all ancestor uuids must belong to datasets. " + f"The following entities belong to non-dataset entities: {entity_types_dict}") + """ Validate the provided value of the activity creation action before updating direct ancestors. Certain values prohibited From b88a5afb026c46ccd19ce91fdb7d1c11bafda819 Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Mon, 11 Nov 2024 14:57:03 -0500 Subject: [PATCH 02/17] Adding epicollection to the provenance schema #523 --- src/schema/provenance_schema.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 17e1188..62ee500 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -1264,3 +1264,14 @@ ENTITIES: type: boolean description: 'Determines if the datasets of an upload are all published.' on_index_trigger: get_has_all_published_datasets + + ############################################# EPICollection ############################################# + Epicollection: + # This superclass property is optional + superclass: Collection + # EPICollection can not be derivation source but not target + derivation: + source: false + target: false + properties: + <<: *shared_collection_properties From 8b5e5efadeb33d05dad2968dee2dec167273eccf Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Mon, 11 Nov 2024 15:32:29 -0500 Subject: [PATCH 03/17] Adding checks for epicollections in endpoints #523 This adds checks for the epicollection entity type in the create entity, update entity, and doi redirect endpoints. --- src/app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/app.py b/src/app.py index 5ccb401..b9b6d1a 100644 --- a/src/app.py +++ b/src/app.py @@ -1153,7 +1153,7 @@ def check_previous_revision(previous_revision_uuid): 'next_revision_uuids', 'previous_revision_uuids' ] - elif normalized_entity_type in ['Upload', 'Collection']: + elif normalized_entity_type in ['Upload', 'Collection', 'Epicollection']: properties_to_skip = [ 'datasets', 'entities' @@ -1542,7 +1542,7 @@ def update_entity(id: str, user_token: str, json_data_dict: dict): 'next_revision_uuids', 'previous_revision_uuids' ] - elif normalized_entity_type in ['Upload', 'Collection']: + elif normalized_entity_type in ['Upload', 'Collection', 'Epicollection']: properties_to_skip = [ 'datasets', 'entities' @@ -2441,7 +2441,7 @@ def doi_redirect(id): entity_type = entity_dict['entity_type'] # Only for collection - if entity_type not in ['Collection', 'Dataset', 'Publication']: + if entity_type not in ['Collection', 'Epicollection', 'Dataset', 'Publication']: abort_bad_req("The target entity of the specified id must be a Collection or Dataset or Publication") uuid = entity_dict['uuid'] From 02f246ae335169add9bac2590bb58d1c42d28fdc Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Tue, 12 Nov 2024 11:09:56 -0500 Subject: [PATCH 04/17] Adding shared_collection_properties to schema #523 --- src/schema/provenance_schema.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 62ee500..cfb5787 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -262,7 +262,7 @@ ENTITIES: source: false target: false # Collection doesn't actually need data_access_level property - properties: + properties: &shared_collection_properties <<: *shared_properties <<: *shared_entity_properties # Because Collection-specific validation is needed for some From ad9e523153f121d50d38b8e07ee1cf47f65e569b Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Tue, 12 Nov 2024 15:20:12 -0500 Subject: [PATCH 05/17] Add validator for collection entities #523 This commit adds a validator for collection and epicollection entities. The validator checks if each given entity_uuid exists and (for epicollections) is a dataset. --- src/schema/schema_validators.py | 65 +++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index 36c4afe..fcb713c 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -63,6 +63,71 @@ def validate_no_duplicates_in_list(property_key, normalized_entity_type, request raise ValueError(f"The {property_key} field must only contain unique items") +""" +Validate every entity exists and (optionally) is a Dataset + +Parameters +---------- +property_key : str + The target property key +normalized_type : str + Submission +request: Flask request object + The instance of Flask request passed in from application request +existing_data_dict : dict + A dictionary that contains all existing entity properties +new_data_dict : dict + The json data in request body, already after the regular validations +""" +def collection_entities_are_existing_entities(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): + # `entity_uuids` is required for creating a Collection + # Verify each UUID specified exists in the uuid-api, exists in Neo4j, and (optionally) is for a Dataset before + # proceeding with creation of Collection. + bad_entities_uuids = [] + for entity_uuid in new_data_dict['entity_uuids']: + try: + # The following code duplicates some functionality existing in app.py, in + # query_target_entity(), which also deals with caching. In the future, the + # validation logic shared by this file and app.py should become a utility + # module, shared by validators as well as app.py. But for now, the code + # is repeated for the following. + + # Get cached ids if exist otherwise retrieve from UUID-API. Expect an + # Exception to be raised if not found. + entity_detail = schema_manager.get_sennet_ids(id=entity_uuid) + entity_uuid = entity_detail['uuid'] + + # If the uuid exists per the uuid-api, make sure it also exists as a Neo4j entity. + entity_dict = schema_neo4j_queries.get_entity(schema_manager.get_neo4j_driver_instance(), entity_uuid) + + # If dataset_uuid is not found in Neo4j fail the validation. + if not entity_dict: + logger.info(f"Request for {entity_uuid} inclusion in Collection, " + "but not found in Neo4j.") + bad_entities_uuids.append(entity_uuid) + continue + + # Collections can have other entity types besides Dataset, so skip the Dataset check + if normalized_entity_type == 'Collection': + continue + + if entity_dict['entity_type'] != 'Dataset': + logger.info(f"Request for {entity_uuid} inclusion in Collection, " + f"but entity_type={entity_dict['entity_type']}, not Dataset.") + bad_entities_uuids.append(entity_uuid) + except Exception: + # If the entity_uuid is not found, fail the validation. + logger.info(f"Request for {entity_uuid} inclusion in Collection " + "failed uuid-api retrieval.") + bad_entities_uuids.append(entity_uuid) + + # If any uuids in the request entities_uuids are not for an existing Dataset entity which + # exists in uuid-api and Neo4j, raise an Exception so the validation fails and the + # operation can be rejected. + if bad_entities_uuids: + raise ValueError(f"Unable to find Datasets for {bad_entities_uuids}.") + + """ If an entity has a DOI, do not allow it to be updated """ From ccb6eb3d96fcdafc517ef4080e08989e035fe5a1 Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Wed, 13 Nov 2024 09:26:10 -0500 Subject: [PATCH 06/17] Fixing external process creation action check #523 --- src/schema/schema_validators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index fcb713c..dbe9bb3 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -562,7 +562,7 @@ def validate_creation_action(property_key, normalized_entity_type, request, exis if creation_action == '': raise ValueError(f"The property {property_key} cannot be empty, when specified.") - if creation_action == 'external process': + if creation_action.lower() == 'external process': direct_ancestor_uuids = new_data_dict.get('direct_ancestor_uuids') entity_types_dict = schema_neo4j_queries.filter_ancestors_by_type(schema_manager.get_neo4j_driver_instance(), direct_ancestor_uuids, "dataset") if entity_types_dict: From 2471a5fa23be1f940afaf0889fc6130746d8b7ed Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Wed, 13 Nov 2024 10:44:32 -0500 Subject: [PATCH 07/17] Fixing external process creation action check #523 This commit clears up the logic in the validate_creation_action. --- src/schema/schema_validators.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index dbe9bb3..43c80fd 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -555,18 +555,19 @@ def validate_publication_date(property_key, normalized_entity_type, request, exi def validate_creation_action(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): - accepted_creation_action_values = SchemaConstants.ALLOWED_SINGLE_CREATION_ACTIONS - creation_action = new_data_dict.get(property_key) - if creation_action and creation_action.lower() not in accepted_creation_action_values: - raise ValueError("Invalid {} value. Accepted values are: {}".format(property_key, ", ".join(accepted_creation_action_values))) + creation_action = new_data_dict[property_key].lower() # raise key error if not found if creation_action == '': raise ValueError(f"The property {property_key} cannot be empty, when specified.") - if creation_action.lower() == 'external process': + accepted_creation_action_values = SchemaConstants.ALLOWED_SINGLE_CREATION_ACTIONS + if creation_action not in accepted_creation_action_values: + raise ValueError("Invalid {} value. Accepted values are: {}".format(property_key, ", ".join(accepted_creation_action_values))) + + if creation_action == 'external process': direct_ancestor_uuids = new_data_dict.get('direct_ancestor_uuids') entity_types_dict = schema_neo4j_queries.filter_ancestors_by_type(schema_manager.get_neo4j_driver_instance(), direct_ancestor_uuids, "dataset") if entity_types_dict: - raise ValueError("If 'creation_action' field is given, all ancestor uuids must belong to datasets. " + raise ValueError("If 'creation_action' field is given and is 'external process', all ancestor uuids must belong to datasets. " f"The following entities belong to non-dataset entities: {entity_types_dict}") From 28958a3beb2ff45b875f3e12289eefca92425046 Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Wed, 13 Nov 2024 10:47:20 -0500 Subject: [PATCH 08/17] Updating schema validator unit tests #523 --- test/test_schema_validators.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/test/test_schema_validators.py b/test/test_schema_validators.py index 9d4b062..e9d3c44 100644 --- a/test/test_schema_validators.py +++ b/test/test_schema_validators.py @@ -1,7 +1,3 @@ -import test - -test.cwd_to_src() - import pytest from schema import schema_validators @@ -12,7 +8,7 @@ ('central process', True), ('Lab Process', True), ('lab process', True), - (None, True), + (None, False), ('Multi-Assay Split', False), ('multi-assay split', False), ('', False), @@ -38,7 +34,7 @@ def test_validate_single_creation_action(creation_action, succeeds): ) else: # Test invalid creation action - with pytest.raises(ValueError): + with pytest.raises((ValueError, KeyError)): schema_validators.validate_creation_action( property_key, normalized_entity_type, request, existing_data_dict, new_data_dict From 0fd0e67cdff8d0b6856115a7ef5a928efc060b3f Mon Sep 17 00:00:00 2001 From: maxsibilla Date: Wed, 13 Nov 2024 11:43:31 -0500 Subject: [PATCH 09/17] Updating link_collection_to_entities to properly remove all entity types from existing collection before updating based on entity_uuids list --- src/schema/schema_neo4j_queries.py | 2 +- src/schema/schema_triggers.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 8d04045..c4ae23f 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -1403,7 +1403,7 @@ def _delete_publication_associated_collection_linkages_tx(tx, uuid): def _delete_collection_linkages_tx(tx, uuid): - query = (f"MATCH (d:Dataset)-[in:IN_COLLECTION]->(c:Collection)" + query = (f"MATCH (e:Entity)-[in:IN_COLLECTION]->(c:Collection)" f" WHERE c.uuid = '{uuid}' " f" DELETE in") diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 4ac7ad7..c62b6dd 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -927,6 +927,11 @@ def link_collection_to_entities(property_key, normalized_type, user_token, exist schema_neo4j_queries.link_collection_to_entities(neo4j_driver=schema_manager.get_neo4j_driver_instance(), collection_uuid=existing_data_dict['uuid'], entities_uuid_list=entity_uuids) + + # Delete the cache of each associated dataset and the collection itself if any cache exists + # Because the `Dataset.collecctions` field and `Collection.datasets` field + uuids_list = [existing_data_dict['uuid']] + entity_uuids + schema_manager.delete_memcached_cache(uuids_list) except TransactionError: # No need to log raise From fe7b57c536d6732a8a6f32dbff64a89d184c549a Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Thu, 14 Nov 2024 15:31:54 -0500 Subject: [PATCH 10/17] Checking entities visibility for collections This commit removes the requirement that the entity is a dataset. In SenNet, any entity type can be associated with a collection. A check was added for the entities visibility to see if a token is required. --- src/app.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/app.py b/src/app.py index 5ccb401..2de5256 100644 --- a/src/app.py +++ b/src/app.py @@ -4618,14 +4618,13 @@ def get_collections(id): # Get the entity dict from cache if exists # Otherwise query against uuid-api and neo4j to get the entity dict if the id exists entity_dict = query_target_entity(id) - normalized_entity_type = entity_dict['entity_type'] uuid = entity_dict['uuid'] public_entity = True - if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): - abort_bad_req(f"Unsupported entity type of id {id}: {normalized_entity_type}") + entity_scope = _get_entity_visibility(normalized_entity_type=entity_dict['entity_type'], + entity_dict=entity_dict) - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_scope == DataVisibilityEnum.NON_PUBLIC: # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required=True) public_entity = False From d8246916a23a53c753d4c168daa542c003f32a48 Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Fri, 15 Nov 2024 10:02:41 -0500 Subject: [PATCH 11/17] Fixing reference to NONPUBLIC --- src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index 2de5256..5720b7d 100644 --- a/src/app.py +++ b/src/app.py @@ -4624,7 +4624,7 @@ def get_collections(id): entity_scope = _get_entity_visibility(normalized_entity_type=entity_dict['entity_type'], entity_dict=entity_dict) - if entity_scope == DataVisibilityEnum.NON_PUBLIC: + if entity_scope == DataVisibilityEnum.NONPUBLIC: # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required=True) public_entity = False From ae42986fbaa1c2c959df9dcebfb407fe83838a25 Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Fri, 15 Nov 2024 10:07:02 -0500 Subject: [PATCH 12/17] Removing dataset constraint from get collections --- src/schema/schema_neo4j_queries.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index d9cc8e8..9bd37b9 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -1996,7 +1996,8 @@ def get_tuplets(neo4j_driver, uuid, property_key=None): """ -Get all collections by uuid +Get all collections by for a given entity uuid + Parameters ---------- neo4j_driver : neo4j.Driver object @@ -2005,6 +2006,7 @@ def get_tuplets(neo4j_driver, uuid, property_key=None): The uuid of target entity property_key : str A target property key for result filtering + Returns ------- list @@ -2014,14 +2016,14 @@ def get_collections(neo4j_driver, uuid, property_key = None): results = [] if property_key: - query = (f"MATCH (c:Collection)<-[:IN_COLLECTION]-(ds:Dataset) " - f"WHERE ds.uuid='{uuid}' " + query = (f"MATCH (c:Collection)<-[:IN_COLLECTION]-(e:Entity) " + f"WHERE e.uuid='{uuid}' " # COLLECT() returns a list # apoc.coll.toSet() reruns a set containing unique nodes f"RETURN apoc.coll.toSet(COLLECT(c.{property_key})) AS {record_field_name}") else: - query = (f"MATCH (c:Collection)<-[:IN_COLLECTION]-(ds:Dataset) " - f"WHERE ds.uuid='{uuid}' " + query = (f"MATCH (c:Collection)<-[:IN_COLLECTION]-(e:Entity) " + f"WHERE e.uuid='{uuid}' " # COLLECT() returns a list # apoc.coll.toSet() reruns a set containing unique nodes f"RETURN apoc.coll.toSet(COLLECT(c)) AS {record_field_name}") From 8b81e57fbe2819f2714749cf20f175ad4043afb0 Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Fri, 15 Nov 2024 11:39:41 -0500 Subject: [PATCH 13/17] Fixing issue when updating epicollection #523 --- src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index b9b6d1a..2615364 100644 --- a/src/app.py +++ b/src/app.py @@ -1504,7 +1504,7 @@ def update_entity(id: str, user_token: str, json_data_dict: dict): if has_dataset_uuids_to_link or has_updated_status: after_update(normalized_entity_type, user_token, merged_updated_dict) - elif normalized_entity_type == 'Collection': + elif schema_manager.entity_type_instanceof(normalized_entity_type, 'Collection'): entity_visibility = _get_entity_visibility(normalized_entity_type=normalized_entity_type, entity_dict=entity_dict) # Prohibit update of an existing Collection if it meets criteria of being visible to public e.g. has DOI. From 95e0766594c43fa0adfa6998fa0ff33af0f16640 Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Fri, 15 Nov 2024 11:42:08 -0500 Subject: [PATCH 14/17] Adding epicollections to collection endpoint #523 This commit updates the GET collections//entities endpoint to support epicollections --- src/app.py | 3 ++- src/schema/schema_neo4j_queries.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/app.py b/src/app.py index 2615364..e8c831c 100644 --- a/src/app.py +++ b/src/app.py @@ -4922,7 +4922,8 @@ def get_entities_for_collection(id: str): # Verify that the entity is a collection entity_dict = query_target_entity(id) entity_type = entity_dict["entity_type"] - if not equals(entity_type, "Collection"): + + if not schema_manager.entity_type_instanceof(entity_type, "Collection"): abort_bad_req(f"{entity_type.title()} with id {id} is not a collection") # Determine if the entity is publicly visible base on its data, only. diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index c4ae23f..e3a0b72 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -801,7 +801,7 @@ def get_dataset_upload(neo4j_driver, uuid, property_key=None): def get_collection_entities(neo4j_driver, uuid): results = [] - query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection) " + query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection|Epicollection) " f"WHERE c.uuid = '{uuid}' " f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}") From 85bf5206ab2e2631219896fe4be9fcd6d16c916f Mon Sep 17 00:00:00 2001 From: maxsibilla Date: Fri, 15 Nov 2024 13:53:12 -0500 Subject: [PATCH 15/17] Adding new constraint for Epicollection --- src/lib/constraints/README.md | 2 +- src/lib/constraints/__init__.py | 9 ++++++++- src/lib/constraints/epicollection.py | 21 +++++++++++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 src/lib/constraints/epicollection.py diff --git a/src/lib/constraints/README.md b/src/lib/constraints/README.md index f2db7ad..1c702ba 100644 --- a/src/lib/constraints/README.md +++ b/src/lib/constraints/README.md @@ -181,7 +181,7 @@ You can reverse the order and the `response.description` will give you valid anc ``` ### Getting the descendants given a particular ancestor: -Remove the `match` param from the request url: +Remove the `match` param from the request url:e #### Request 2a: `/constraints` ``` diff --git a/src/lib/constraints/__init__.py b/src/lib/constraints/__init__.py index 66202ca..04b12bc 100644 --- a/src/lib/constraints/__init__.py +++ b/src/lib/constraints/__init__.py @@ -3,6 +3,7 @@ from lib.constraints.sample import * from lib.constraints.dataset import * from lib.constraints.publication import * +from lib.constraints.epicollection import * from deepdiff import DeepDiff from atlas_consortia_commons.rest import rest_ok, rest_response, StatusCodes, rest_bad_req @@ -21,10 +22,15 @@ def build_sample_constraints(entity) -> list: def build_dataset_constraints(entity) -> list: return build_all_dataset_constraints(entity) + def build_publication_constraints(entity) -> list: return build_all_publication_constraints(entity) +def build_epicollection_constraints(entity) -> list: + return build_all_epicollection_constraints(entity) + + def determine_constraint_from_entity(constraint_unit, use_case=None) -> dict: entity_type = constraint_unit.get('entity_type', '') entity_type = entity_type.lower() @@ -32,7 +38,8 @@ def determine_constraint_from_entity(constraint_unit, use_case=None) -> dict: error = None constraints = [] entities = Ontology.ops(as_arr=True, cb=enum_val_lower).entities() - + # Need to manually add Epicollection + entities.append('epicollection') if entity_type not in entities: error = f"No `entity_type` found with value `{entity_type}`" else: diff --git a/src/lib/constraints/epicollection.py b/src/lib/constraints/epicollection.py new file mode 100644 index 0000000..04a9375 --- /dev/null +++ b/src/lib/constraints/epicollection.py @@ -0,0 +1,21 @@ +from lib.constraints.base import build_constraint, build_constraint_unit, build_search_constraint_unit +from lib.ontology import Ontology + + +# can be the descendant of / ---> +def build_all_epicollection_constraints(entity): + + ancestor = build_constraint_unit(Ontology.ops().entities().DATASET) + descendant = build_constraint_unit(entity) + + return [ + build_constraint(ancestor, [descendant]) + ] + +def build_epicollection_search_constraints(entity): + descendant = build_constraint_unit(entity) + ancestor = build_search_constraint_unit('entity_type.keyword', Ontology.ops().entities().DATASET) + + return [ + build_constraint([ancestor], [descendant]) + ] From f2825b2cdb449512775b7c8cec85aa3081dd00f6 Mon Sep 17 00:00:00 2001 From: maxsibilla Date: Fri, 15 Nov 2024 13:54:01 -0500 Subject: [PATCH 16/17] Reverting README edit --- src/lib/constraints/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/constraints/README.md b/src/lib/constraints/README.md index 1c702ba..f2db7ad 100644 --- a/src/lib/constraints/README.md +++ b/src/lib/constraints/README.md @@ -181,7 +181,7 @@ You can reverse the order and the `response.description` will give you valid anc ``` ### Getting the descendants given a particular ancestor: -Remove the `match` param from the request url:e +Remove the `match` param from the request url: #### Request 2a: `/constraints` ``` From ad83a0d8b1a711f8abaf67f8c4e1b9cfb34a46d3 Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Fri, 15 Nov 2024 15:51:05 -0500 Subject: [PATCH 17/17] Adding collections to source and samples --- src/app.py | 2 +- src/schema/provenance_schema.yaml | 18 ++++++++++++++++-- src/schema/schema_neo4j_queries.py | 8 ++++---- src/schema/schema_triggers.py | 8 ++++---- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/app.py b/src/app.py index 5720b7d..02c9b8b 100644 --- a/src/app.py +++ b/src/app.py @@ -5604,7 +5604,7 @@ def delete_cache(id): upload_dataset_uuids = schema_neo4j_queries.get_upload_datasets(neo4j_driver_instance, entity_uuid , 'uuid') # If the target entity is Datasets/Publication, delete the associated Collections cache, Upload cache - collection_uuids = schema_neo4j_queries.get_dataset_collections(neo4j_driver_instance, entity_uuid , 'uuid') + collection_uuids = schema_neo4j_queries.get_entity_collections(neo4j_driver_instance, entity_uuid , 'uuid') collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid) upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid) diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 17e1188..b22bc01 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -444,8 +444,8 @@ ENTITIES: transient: true generated: true description: "A list of collections that this dataset belongs to. Will be returned in response" - on_read_trigger: get_dataset_collections - on_index_trigger: get_dataset_collections + on_read_trigger: get_entity_collections + on_index_trigger: get_entity_collections # No on_index_trigger to include collections in the OpenSearch document for a Dataset (from HM, we might need this for SN) upload: type: json_string # dict @@ -888,6 +888,13 @@ ENTITIES: type: string required_on_create: true description: "A high level description of where this source originates from." + collections: + type: list + transient: true + generated: true + description: "A list of collections that this source belongs to. Will be returned in response" + on_read_trigger: get_entity_collections + on_index_trigger: get_entity_collections ############################################# Sample ############################################# @@ -1149,6 +1156,13 @@ ENTITIES: immutable: true description: "A boolean indicating if the Sample contains any Datasets in its descendants." on_index_trigger: get_contains_data + collections: + type: list + transient: true + generated: true + description: "A list of collections that this sample belongs to. Will be returned in response" + on_read_trigger: get_entity_collections + on_index_trigger: get_entity_collections ############################################# Upload ############################################# # Eventually we might allow folks to bulk register Samples or Sources through the same mechanism. diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 9bd37b9..f18d2f6 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -662,14 +662,14 @@ def get_next_revision_uuid(neo4j_driver, uuid): """ -Get a list of associated collection uuids for a given dataset +Get a list of associated collection uuids for a given entity Parameters ---------- neo4j_driver : neo4j.Driver object The neo4j database connection pool uuid : str - The uuid of dataset + The uuid of entity property_key : str A target property key for result filtering @@ -680,7 +680,7 @@ def get_next_revision_uuid(neo4j_driver, uuid): """ -def get_dataset_collections(neo4j_driver, uuid, property_key=None): +def get_entity_collections(neo4j_driver, uuid, property_key=None): results = [] if property_key: @@ -692,7 +692,7 @@ def get_dataset_collections(neo4j_driver, uuid, property_key=None): f"WHERE e.uuid = '{uuid}' " f"RETURN apoc.coll.toSet(COLLECT(c)) AS {record_field_name}") - logger.info("======get_dataset_collections() query======") + logger.info("======get_entity_collections() query======") logger.info(query) with neo4j_driver.session() as session: diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 4ac7ad7..8e67c23 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -799,7 +799,7 @@ def set_dataset_status_new(property_key, normalized_type, user_token, existing_d return property_key, 'New' -def get_dataset_collections(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_entity_collections(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): """Trigger event method of getting a list of collections for this new Dataset. Parameters @@ -825,15 +825,15 @@ def get_dataset_collections(property_key, normalized_type, user_token, existing_ if 'uuid' not in existing_data_dict: msg = create_trigger_error_msg( - "Missing 'uuid' key in 'existing_data_dict' during calling 'get_dataset_collections()' trigger method.", + "Missing 'uuid' key in 'existing_data_dict' during calling 'get_entity_collections()' trigger method.", existing_data_dict, new_data_dict ) raise KeyError(msg) # No property key needs to filter the result # Get back the list of collection dicts - collections_list = schema_neo4j_queries.get_dataset_collections(schema_manager.get_neo4j_driver_instance(), - existing_data_dict['uuid']) + collections_list = schema_neo4j_queries.get_entity_collections(schema_manager.get_neo4j_driver_instance(), + existing_data_dict['uuid']) if collections_list: # Exclude datasets from each resulting collection # We don't want to show too much nested information