sennetconsortium · maxsibilla · Nov 21, 2024 · Nov 11, 2024 · Nov 11, 2024 · Nov 11, 2024
@@ -1153,7 +1153,7 @@ def check_previous_revision(previous_revision_uuid):
             'next_revision_uuids',
             'previous_revision_uuids'
         ]
-    elif normalized_entity_type in ['Upload', 'Collection']:
+    elif normalized_entity_type in ['Upload', 'Collection', 'Epicollection']:
         properties_to_skip = [
             'datasets',
             'entities'
@@ -1504,7 +1504,7 @@ def update_entity(id: str, user_token: str, json_data_dict: dict):
         if has_dataset_uuids_to_link or has_updated_status:
             after_update(normalized_entity_type, user_token, merged_updated_dict)
 
-    elif normalized_entity_type == 'Collection':
+    elif schema_manager.entity_type_instanceof(normalized_entity_type, 'Collection'):
         entity_visibility = _get_entity_visibility(normalized_entity_type=normalized_entity_type, entity_dict=entity_dict)
 
         # Prohibit update of an existing Collection if it meets criteria of being visible to public e.g. has DOI.
@@ -1542,7 +1542,7 @@ def update_entity(id: str, user_token: str, json_data_dict: dict):
             'next_revision_uuids',
             'previous_revision_uuids'
         ]
-    elif normalized_entity_type in ['Upload', 'Collection']:
+    elif normalized_entity_type in ['Upload', 'Collection', 'Epicollection']:
         properties_to_skip = [
             'datasets',
             'entities'
@@ -2441,7 +2441,7 @@ def doi_redirect(id):
     entity_type = entity_dict['entity_type']
 
     # Only for collection
-    if entity_type not in ['Collection', 'Dataset', 'Publication']:
+    if entity_type not in ['Collection', 'Epicollection', 'Dataset', 'Publication']:
         abort_bad_req("The target entity of the specified id must be a Collection or Dataset or Publication")
 
     uuid = entity_dict['uuid']
@@ -4922,7 +4922,8 @@ def get_entities_for_collection(id: str):
     # Verify that the entity is a collection
     entity_dict = query_target_entity(id)
     entity_type = entity_dict["entity_type"]
-    if not equals(entity_type, "Collection"):
+
+    if not schema_manager.entity_type_instanceof(entity_type, "Collection"):
         abort_bad_req(f"{entity_type.title()} with id {id} is not a collection")
 
     # Determine if the entity is publicly visible base on its data, only.

@@ -3,6 +3,7 @@
 from lib.constraints.sample import *
 from lib.constraints.dataset import *
 from lib.constraints.publication import *
+from lib.constraints.epicollection import *
 from deepdiff import DeepDiff
 
 from atlas_consortia_commons.rest import rest_ok, rest_response, StatusCodes, rest_bad_req
@@ -21,18 +22,24 @@ def build_sample_constraints(entity) -> list:
 def build_dataset_constraints(entity) -> list:
     return build_all_dataset_constraints(entity)
 
+
 def build_publication_constraints(entity) -> list:
     return build_all_publication_constraints(entity)
 
 
+def build_epicollection_constraints(entity) -> list:
+    return build_all_epicollection_constraints(entity)
+
+
 def determine_constraint_from_entity(constraint_unit, use_case=None) -> dict:
     entity_type = constraint_unit.get('entity_type', '')
     entity_type = entity_type.lower()
     sub_type = constraint_unit.get('sub_type')
     error = None
     constraints = []
     entities = Ontology.ops(as_arr=True, cb=enum_val_lower).entities()
-
+    # Need to manually add Epicollection
+    entities.append('epicollection')
     if entity_type not in entities:
         error = f"No `entity_type` found with value `{entity_type}`"
     else:

@@ -0,0 +1,21 @@
+from lib.constraints.base import build_constraint, build_constraint_unit, build_search_constraint_unit
+from lib.ontology import Ontology
+
+
+# can be the descendant of / --->
+def build_all_epicollection_constraints(entity):
+
+    ancestor = build_constraint_unit(Ontology.ops().entities().DATASET)
+    descendant = build_constraint_unit(entity)
+
+    return [
+        build_constraint(ancestor, [descendant])
+    ]
+
+def build_epicollection_search_constraints(entity):
+    descendant = build_constraint_unit(entity)
+    ancestor = build_search_constraint_unit('entity_type.keyword', Ontology.ops().entities().DATASET)
+
+    return [
+        build_constraint([ancestor], [descendant])
+    ]
@@ -262,7 +262,7 @@ ENTITIES:
       source: false
       target: false
     # Collection doesn't actually need data_access_level property
-    properties:
+    properties: &shared_collection_properties
       <<: *shared_properties
       <<: *shared_entity_properties
       # Because Collection-specific validation is needed for some
@@ -1264,3 +1264,14 @@ ENTITIES:
         type: boolean
         description: 'Determines if the datasets of an upload are all published.'
         on_index_trigger: get_has_all_published_datasets
+
+  ############################################# EPICollection #############################################
+  Epicollection:
+    # This superclass property is optional
+    superclass: Collection
+    # EPICollection can not be derivation source but not target
+    derivation:
+      source: false
+      target: false
+    properties:
+      <<: *shared_collection_properties
@@ -23,7 +23,7 @@ class SchemaConstants(object):
 
     DOI_BASE_URL = 'https://doi.org/'
 
-    ALLOWED_SINGLE_CREATION_ACTIONS = ['central process', 'lab process']
+    ALLOWED_SINGLE_CREATION_ACTIONS = ['central process', 'lab process', 'external process']
     ALLOWED_MULTI_CREATION_ACTIONS = ['multi-assay split']
 
     ALLOWED_DATASET_STATUSES = ['new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted', 'incomplete']

@@ -107,6 +107,39 @@ def get_dataset_direct_descendants(neo4j_driver, uuid, property_key=None, match_
 
     return results
 
+
+"""
+Get the uuids for each entity in a list that doesn't belong to a certain entity type. Uuids are ordered by type
+
+Parameters
+----------
+neo4j_driver : neo4j.Driver object
+    The neo4j database connection pool
+direct_ancestor_uuids : list
+    List of the uuids to be filtered
+entity_type : string
+    The entity to be excluded
+
+Returns
+-------
+dict
+    A dictionary of entity uuids that don't pass the filter, grouped by entity_type
+"""
+
+
+def filter_ancestors_by_type(neo4j_driver, direct_ancestor_uuids, entity_type):
+    query = (f"MATCH (e:Entity) "
+             f"WHERE e.uuid in {direct_ancestor_uuids} AND toLower(e.entity_type) <> '{entity_type.lower()}' "
+             f"RETURN e.entity_type AS entity_type, collect(e.uuid) AS uuids")
+    logger.info("======filter_ancestors_by_type======")
+    logger.info(query)
+
+    with neo4j_driver.session() as session:
+        records = session.run(query).data()
+
+    return records if records else None
+
+
 """
 Get the origin (organ) sample ancestor of a given entity by uuid
 
@@ -768,7 +801,7 @@ def get_dataset_upload(neo4j_driver, uuid, property_key=None):
 def get_collection_entities(neo4j_driver, uuid):
     results = []
 
-    query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection) "
+    query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection|Epicollection) "
              f"WHERE c.uuid = '{uuid}' "
              f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}")
 
@@ -1370,7 +1403,7 @@ def _delete_publication_associated_collection_linkages_tx(tx, uuid):
 
 
 def _delete_collection_linkages_tx(tx, uuid):
-    query = (f"MATCH (d:Dataset)-[in:IN_COLLECTION]->(c:Collection)"
+    query = (f"MATCH (e:Entity)-[in:IN_COLLECTION]->(c:Collection)"
              f" WHERE c.uuid = '{uuid}' "
              f" DELETE in")
 

@@ -927,6 +927,11 @@ def link_collection_to_entities(property_key, normalized_type, user_token, exist
         schema_neo4j_queries.link_collection_to_entities(neo4j_driver=schema_manager.get_neo4j_driver_instance(),
                                                          collection_uuid=existing_data_dict['uuid'],
                                                          entities_uuid_list=entity_uuids)
+
+        # Delete the cache of each associated dataset and the collection itself if any cache exists
+        # Because the `Dataset.collecctions` field and `Collection.datasets` field
+        uuids_list = [existing_data_dict['uuid']] + entity_uuids
+        schema_manager.delete_memcached_cache(uuids_list)
     except TransactionError:
         # No need to log
         raise

@@ -63,6 +63,71 @@ def validate_no_duplicates_in_list(property_key, normalized_entity_type, request
         raise ValueError(f"The {property_key} field must only contain unique items")
 
 
+"""
+Validate every entity exists and (optionally) is a Dataset
+
+Parameters
+----------
+property_key : str
+    The target property key
+normalized_type : str
+    Submission
+request: Flask request object
+    The instance of Flask request passed in from application request
+existing_data_dict : dict
+    A dictionary that contains all existing entity properties
+new_data_dict : dict
+    The json data in request body, already after the regular validations
+"""
+def collection_entities_are_existing_entities(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
+    # `entity_uuids` is required for creating a Collection
+    # Verify each UUID specified exists in the uuid-api, exists in Neo4j, and (optionally) is for a Dataset before
+    # proceeding with creation of Collection.
+    bad_entities_uuids = []
+    for entity_uuid in new_data_dict['entity_uuids']:
+        try:
+            # The following code duplicates some functionality existing in app.py, in
+            # query_target_entity(), which also deals with caching. In the future, the
+            # validation logic shared by this file and app.py should become a utility
+            # module, shared by validators as well as app.py.  But for now, the code
+            # is repeated for the following.
+
+            # Get cached ids if exist otherwise retrieve from UUID-API. Expect an
+            # Exception to be raised if not found.
+            entity_detail = schema_manager.get_sennet_ids(id=entity_uuid)
+            entity_uuid = entity_detail['uuid']
+
+            # If the uuid exists per the uuid-api, make sure it also exists as a Neo4j entity.
+            entity_dict = schema_neo4j_queries.get_entity(schema_manager.get_neo4j_driver_instance(), entity_uuid)
+
+            # If dataset_uuid is not found in Neo4j fail the validation.
+            if not entity_dict:
+                logger.info(f"Request for {entity_uuid} inclusion in Collection, "
+                            "but not found in Neo4j.")
+                bad_entities_uuids.append(entity_uuid)
+                continue
+
+            # Collections can have other entity types besides Dataset, so skip the Dataset check
+            if normalized_entity_type == 'Collection':
+                continue
+
+            if entity_dict['entity_type'] != 'Dataset':
+                logger.info(f"Request for {entity_uuid} inclusion in Collection, "
+                            f"but entity_type={entity_dict['entity_type']}, not Dataset.")
+                bad_entities_uuids.append(entity_uuid)
+        except Exception:
+            # If the entity_uuid is not found, fail the validation.
+            logger.info(f"Request for {entity_uuid} inclusion in Collection "
+                        "failed uuid-api retrieval.")
+            bad_entities_uuids.append(entity_uuid)
+
+    # If any uuids in the request entities_uuids are not for an existing Dataset entity which
+    # exists in uuid-api and Neo4j, raise an Exception so the validation fails and the
+    # operation can be rejected.
+    if bad_entities_uuids:
+        raise ValueError(f"Unable to find Datasets for {bad_entities_uuids}.")
+
+
 """
 If an entity has a DOI, do not allow it to be updated 
 """
@@ -490,13 +555,21 @@ def validate_publication_date(property_key, normalized_entity_type, request, exi
 
 
 def validate_creation_action(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
-    accepted_creation_action_values = SchemaConstants.ALLOWED_SINGLE_CREATION_ACTIONS
-    creation_action = new_data_dict.get(property_key)
-    if creation_action and creation_action.lower() not in accepted_creation_action_values:
-        raise ValueError("Invalid {} value. Accepted values are: {}".format(property_key, ", ".join(accepted_creation_action_values)))
+    creation_action = new_data_dict[property_key].lower()  # raise key error if not found
     if creation_action == '':
         raise ValueError(f"The property {property_key} cannot be empty, when specified.")
 
+    accepted_creation_action_values = SchemaConstants.ALLOWED_SINGLE_CREATION_ACTIONS
+    if creation_action not in accepted_creation_action_values:
+        raise ValueError("Invalid {} value. Accepted values are: {}".format(property_key, ", ".join(accepted_creation_action_values)))
+
+    if creation_action == 'external process':
+        direct_ancestor_uuids = new_data_dict.get('direct_ancestor_uuids')
+        entity_types_dict = schema_neo4j_queries.filter_ancestors_by_type(schema_manager.get_neo4j_driver_instance(), direct_ancestor_uuids, "dataset")
+        if entity_types_dict:
+            raise ValueError("If 'creation_action' field is given and is 'external process', all ancestor uuids must belong to datasets. "
+                             f"The following entities belong to non-dataset entities: {entity_types_dict}")
+
 
 """
 Validate the provided value of the activity creation action before updating direct ancestors. Certain values prohibited

@@ -1,7 +1,3 @@
-import test
-
-test.cwd_to_src()
-
 import pytest
 
 from schema import schema_validators
@@ -12,7 +8,7 @@
     ('central process', True),
     ('Lab Process', True),
     ('lab process', True),
-    (None, True),
+    (None, False),
     ('Multi-Assay Split', False),
     ('multi-assay split', False),
     ('', False),
@@ -38,7 +34,7 @@ def test_validate_single_creation_action(creation_action, succeeds):
         )
     else:
         # Test invalid creation action
-        with pytest.raises(ValueError):
+        with pytest.raises((ValueError, KeyError)):
             schema_validators.validate_creation_action(
                 property_key, normalized_entity_type, request,
                 existing_data_dict, new_data_dict