sennetconsortium · maxsibilla · Oct 10, 2024 · Oct 9, 2024 · Oct 9, 2024 · Oct 9, 2024
@@ -558,7 +558,7 @@ def update_entity(neo4j_driver, entity_type, entity_data_dict, uuid):
         raise TransactionError(msg)
 
 
-def get_ancestors(neo4j_driver, uuid, property_key=None):
+def get_ancestors(neo4j_driver, uuid, data_access_level=None, property_key=None):
     """Get all ancestors by uuid.
 
     Parameters
@@ -567,6 +567,8 @@ def get_ancestors(neo4j_driver, uuid, property_key=None):
         The neo4j database connection pool
     uuid : str
         The uuid of target entity
+    data_access_level : Optional[str]
+        The data access level of the ancestor entities (public or consortium). None returns all ancestors.
     property_key : str
         A target property key for result filtering
 
@@ -577,17 +579,21 @@ def get_ancestors(neo4j_driver, uuid, property_key=None):
     """
     results = []
 
+    predicate = ''
+    if data_access_level:
+        predicate = f"AND ancestor.data_access_level = '{data_access_level}' "
+
     if property_key:
         query = (f"MATCH (e:Entity)-[:USED|WAS_GENERATED_BY*]->(ancestor:Entity) "
                  # Filter out the Lab entities
-                 f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' "
+                 f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' {predicate}"
                  # COLLECT() returns a list
                  # apoc.coll.toSet() reruns a set containing unique nodes
                  f"RETURN apoc.coll.toSet(COLLECT(ancestor.{property_key})) AS {record_field_name}")
     else:
         query = (f"MATCH (e:Entity)-[:USED|WAS_GENERATED_BY*]->(ancestor:Entity) "
                  # Filter out the Lab entities
-                 f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' "
+                 f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' {predicate}"
                  # COLLECT() returns a list
                  # apoc.coll.toSet() reruns a set containing unique nodes
                  f"RETURN apoc.coll.toSet(COLLECT(ancestor)) AS {record_field_name}")
@@ -614,7 +620,7 @@ def get_ancestors(neo4j_driver, uuid, property_key=None):
     return results
 
 
-def get_descendants(neo4j_driver, uuid, property_key=None):
+def get_descendants(neo4j_driver, uuid, data_access_level=None, property_key=None):
     """ Get all descendants by uuid
 
     Parameters
@@ -623,6 +629,8 @@ def get_descendants(neo4j_driver, uuid, property_key=None):
         The neo4j database connection pool
     uuid : str
         The uuid of target entity
+    data_access_level : Optional[str]
+        The data access level of the descendant entities (public or consortium). None returns all descendants.
     property_key : str
         A target property key for result filtering
 
@@ -633,17 +641,21 @@ def get_descendants(neo4j_driver, uuid, property_key=None):
     """
     results = []
 
+    predicate = ''
+    if data_access_level:
+        predicate = f"AND descendant.data_access_level = '{data_access_level}' "
+
     if property_key:
         query = (f"MATCH (e:Entity)<-[:USED|WAS_GENERATED_BY*]-(descendant:Entity) "
                  # The target entity can't be a Lab
-                 f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' "
+                 f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' {predicate}"
                  # COLLECT() returns a list
                  # apoc.coll.toSet() reruns a set containing unique nodes
                  f"RETURN apoc.coll.toSet(COLLECT(descendant.{property_key})) AS {record_field_name}")
     else:
         query = (f"MATCH (e:Entity)<-[:USED|WAS_GENERATED_BY*]-(descendant:Entity) "
                  # The target entity can't be a Lab
-                 f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' "
+                 f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' {predicate}"
                  # COLLECT() returns a list
                  # apoc.coll.toSet() reruns a set containing unique nodes
                  f"RETURN apoc.coll.toSet(COLLECT(descendant)) AS {record_field_name}")

@@ -239,6 +239,12 @@ shared_entity_properties: &shared_entity_properties
     before_property_update_validators:
       - validate_application_header_before_property_update
     description: "The metadata returned from the processing at data submission time."
+  has_metadata:
+    type: string
+    generated: true # Disallow entry from users via POST
+    immutable: true # Disallow update via PUT
+    description: "True if the entity has metadata associated with it. Otherwise False."
+    on_index_trigger: get_has_metadata
   was_attributed_to:
     type: list
     description: "Attribution is the ascribing of an entity to an agent."
@@ -351,6 +357,17 @@ ENTITIES:
   Dataset:
     # Only allowed applications can create new Dataset via POST
     before_entity_create_validator: validate_application_header_before_entity_create
+    excluded_properties_from_public_response:
+      - lab_dataset_id
+      - sources:
+          - lab_source_id
+      - ingest_metadata:
+          - metadata:
+              - lab_id
+              - slide_id
+      - cedar_mapped_metadata:
+          - Lab ID
+          - Slide ID
     # Dataset can be either derivation source or target
     derivation:
       source: true
@@ -751,6 +768,9 @@ ENTITIES:
     derivation:
       source: false
       target: true
+    excluded_properties_from_public_response:
+      - lab_source_id
+      - label
     properties:
       <<: *shared_properties
       <<: *shared_entity_properties
@@ -869,6 +889,20 @@ ENTITIES:
     derivation:
       source: true
       target: true
+    excluded_properties_from_public_response:
+      - lab_tissue_sample_id
+      - origin_sample:
+          - lab_tissue_sample_id
+      - origin_samples:
+          - lab_tissue_sample_id
+      - source:
+          - lab_source_id
+      - metadata:
+          - lab_id
+          - slide_id
+      - cedar_mapped_metadata:
+          - Lab ID
+          - Slide ID
     properties:
       <<: *shared_properties
       <<: *shared_entity_properties

@@ -298,6 +298,77 @@ def get_all_entity_types():
     return list(dict_keys)
 
 
+def get_fields_to_exclude(normalized_class=None):
+    """Retrieves fields designated in the provenance schema yaml under
+    excluded_properties_from_public_response and returns the fields in a list.
+
+    Parameters
+    ----------
+    normalized_class : Optional[str]
+        the normalized entity type of the entity who's fields are to be removed
+
+    Returns
+    -------
+    list[str]
+        A list of strings where each entry is a field to be excluded
+    """
+    # Determine the schema section based on class
+    excluded_fields = []
+    schema_section = _schema['ENTITIES']
+    exclude_list = schema_section[normalized_class].get('excluded_properties_from_public_response')
+    if exclude_list:
+        excluded_fields.extend(exclude_list)
+    return excluded_fields
+
+
+def exclude_properties_from_response(excluded_fields, output_dict):
+    """Removes specified fields from an existing dictionary.
+
+    Parameters
+    ----------
+    excluded_fields : list
+        A list of the fields to be excluded
+    output_dict : dictionary
+        A dictionary representing the data to be modified
+
+    Returns
+    -------
+    dict
+        The modified data with removed fields
+    """
+    def delete_nested_field(data, nested_path):
+        if isinstance(nested_path, dict):
+            for key, value in nested_path.items():
+                if key in data:
+                    if isinstance(value, list):
+                        for nested_field in value:
+                            if isinstance(nested_field, dict):
+                                delete_nested_field(data[key], nested_field)
+
+                            elif isinstance(data[key], list):
+                                for item in data[key]:
+                                    if nested_field in item:
+                                        del item[nested_field]
+
+                            elif nested_field in data[key]:
+                                del data[key][nested_field]
+                    elif isinstance(value, dict):
+                        delete_nested_field(data[key], value)
+
+        elif nested_path in data:
+            if isinstance(data[nested_path], list):
+                for item in data[nested_path]:
+                    if nested_path in item:
+                        del item[nested_path]
+            else:
+                del data[nested_path]
+
+    for field in excluded_fields:
+        delete_nested_field(output_dict, field)
+
+    return output_dict
+
+
 """
 Generating triggered data based on the target events and methods
 
@@ -2045,6 +2116,12 @@ def generate_activity_data(normalized_entity_type, user_token, user_info_dict, c
     return generated_activity_data_dict
 
 
+
+
+
+
+
+
 """
 Get the ingest-api URL to be used by trigger methods
 

@@ -1295,6 +1295,55 @@ def link_to_previous_revision(property_key, normalized_type, user_token, existin
         raise
 
 
+def get_has_metadata(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
+    """Trigger event method for determining if the entity has metadata.
+
+    Parameters
+    ----------
+    property_key : str
+        The target property key
+    normalized_type : str
+        One of the types defined in the schema yaml
+    user_token: str
+        The user's globus nexus token
+    existing_data_dict : dict
+        A dictionary that contains all existing entity properties
+    new_data_dict : dict
+        A merged dictionary that contains all possible input data to be used
+
+    Returns
+    -------
+    Tuple[str, str]
+        str: The target property key
+        str: "True" or "False" if the entity has metadata
+    """
+    if 'uuid' not in existing_data_dict:
+        msg = create_trigger_error_msg(
+            "Missing 'uuid' key in 'existing_data_dict' during calling 'get_has_metadata()' trigger method.",
+            existing_data_dict, new_data_dict
+        )
+        raise KeyError(msg)
+
+    if equals(Ontology.ops().entities().DATASET, existing_data_dict['entity_type']):
+        ingest_metadata = existing_data_dict.get('ingest_metadata', {})
+        has_metadata = 'metadata' in ingest_metadata
+        return property_key, str(has_metadata)
+
+    SpecimenCategories = Ontology.ops().specimen_categories()
+    if (
+        equals(Ontology.ops().entities().SOURCE, existing_data_dict['entity_type'])
+        or equals('Collection', existing_data_dict['entity_type'])
+        or equals('Publication', existing_data_dict['entity_type'])
+        or equals(SpecimenCategories.BLOCK, existing_data_dict.get('sample_category'))
+        or equals(SpecimenCategories.SECTION, existing_data_dict.get('sample_category'))
+        or equals(SpecimenCategories.SUSPENSION, existing_data_dict.get('sample_category'))
+    ):
+        has_metadata = 'metadata' in existing_data_dict
+        return property_key, str(has_metadata)
+
+    return property_key, None
+
+
 def get_source_mapped_metadata(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
     """Trigger event method of auto generating mapped metadata from 'living_donor_data' or 'organ_donor_data'.
 

@@ -55,7 +55,6 @@
       "uuid": "5fa78f4fa272db58a7fcc7590376f5e7",
       "source": {
         "metadata": {},
-        "lab_source_id": "Human Source 1",
         "group_name": "CODCC Testing Group",
         "sennet_id": "SNT522.GDLF.724",
         "last_modified_timestamp": 1681844922032,
@@ -83,7 +82,6 @@
       "entity_type": "Sample",
       "group_name": "CODCC Testing Group",
       "group_uuid": "57192604-18e0-11ed-b79b-972795fc9504",
-      "lab_tissue_sample_id": "Human Blood",
       "last_modified_timestamp": 1681828779121,
       "last_modified_user_displayname": "Test User",
       "last_modified_user_email": "[email protected]",
@@ -98,7 +96,6 @@
         "entity_type": "Sample",
         "group_name": "CODCC Testing Group",
         "group_uuid": "57192604-18e0-11ed-b79b-972795fc9504",
-        "lab_tissue_sample_id": "Human Blood",
         "last_modified_timestamp": 1681828779121,
         "last_modified_user_displayname": "Test User",
         "last_modified_user_email": "[email protected]",
@@ -113,7 +110,6 @@
       "uuid": "cf3d0408de9afd703c8bd71808176b38",
       "source": {
         "metadata": {},
-        "lab_source_id": "Human Source 1",
         "group_name": "CODCC Testing Group",
         "sennet_id": "SNT522.GDLF.724",
         "last_modified_timestamp": 1681844922032,
@@ -142,7 +138,6 @@
       "entity_type": "Source",
       "group_name": "CODCC Testing Group",
       "group_uuid": "57192604-18e0-11ed-b79b-972795fc9504",
-      "lab_source_id": "Human Source 1",
       "last_modified_timestamp": 1681844922032,
       "last_modified_user_displayname": "Test User",
       "last_modified_user_email": "[email protected]",

@@ -13,7 +13,6 @@
       "entity_type": "Dataset",
       "group_name": "CODCC Testing Group",
       "group_uuid": "57192604-18e0-11ed-b79b-972795fc9504",
-      "lab_dataset_id": "Vitessce example - secondary dataset",
       "last_modified_timestamp": 1681841991272,
       "last_modified_user_displayname": "Test User",
       "last_modified_user_email": "[email protected]",
@@ -26,7 +25,6 @@
       "sources": [
         {
           "metadata": {},
-          "lab_source_id": "Human Source 1",
           "group_name": "CODCC Testing Group",
           "sennet_id": "SNT522.GDLF.724",
           "last_modified_timestamp": 1681844922032,

@@ -49,7 +49,6 @@
     "entity_type": "Dataset",
     "group_name": "University of Pittsburgh TMC",
     "group_uuid": "28db7a2b-ed8a-11ec-8b0a-9fe9b51132b1",
-    "lab_dataset_id": "897-Dataset",
     "last_modified_timestamp": 1683227917901,
     "last_modified_user_displayname": "Test User",
     "last_modified_user_email": "[email protected]",