From d2d0134844badf45fa2081ac124654c9e021a53b Mon Sep 17 00:00:00 2001
From: Tyler Madonna <tjm159@pitt.edu>
Date: Wed, 9 Oct 2024 11:08:51 -0400
Subject: [PATCH 1/5] Adding excluded properties from public response

---
 src/app.py                        | 168 +++++++++++++++++++++++++++---
 src/schema/provenance_schema.yaml |  28 +++++
 src/schema/schema_manager.py      |  77 ++++++++++++++
 3 files changed, 259 insertions(+), 14 deletions(-)
diff --git a/src/app.py b/src/app.py
index d273b6a..59b3c0b 100644
--- a/src/app.py
+++ b/src/app.py
@@ -446,18 +446,22 @@ def get_ancestor_organs(id):
     # since public entities don't require user token
     token = get_internal_token()
 
+    public_entity = True
     if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
         # Only published/public datasets don't require token
         if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
             # Token is required and the user must belong to SenNet-READ group
             token = get_user_token(request, non_public_access_required=True)
+            public_entity = False
     else:
         # The `data_access_level` of Sample can only be either 'public' or 'consortium'
         if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM:
             token = get_user_token(request, non_public_access_required=True)
+            public_entity = False
 
     # By now, either the entity is public accessible or the user token has the correct access level
     organs = app_neo4j_queries.get_ancestor_organs(neo4j_driver_instance, entity_dict['uuid'])
+    excluded_fields = schema_manager.get_fields_to_exclude('Sample')
 
     # Skip executing the trigger method to get Sample.direct_ancestor
     properties_to_skip = ['direct_ancestor']
@@ -466,6 +470,12 @@ def get_ancestor_organs(id):
     # Final result after normalization
     final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
 
+    if public_entity and not user_in_sennet_read_group(request):
+        filtered_organs_list = []
+        for organ in final_result:
+            filtered_organs_list.append(schema_manager.exclude_properties_from_response(excluded_fields, organ))
+        final_result = filtered_organs_list
+
     return jsonify(final_result)
 
 
@@ -540,6 +550,7 @@ def get_entity_by_id(id):
     # Query target entity against uuid-api and neo4j and return as a dict if exists
     entity_dict = query_target_entity(id)
     normalized_entity_type = entity_dict['entity_type']
+    fields_to_exclude = schema_manager.get_fields_to_exclude(normalized_entity_type)
 
     # Use the internal token to query the target entity
     # since public entities don't require user token
@@ -553,11 +564,13 @@ def get_entity_by_id(id):
     # Determine if the entity is publicly visible base on its data, only.
     entity_scope = _get_entity_visibility(normalized_entity_type=normalized_entity_type,
                                           entity_dict=complete_dict)
+    public_entity = False
 
     # Initialize the user as authorized if the data is public.  Otherwise, the
     # user is not authorized and credentials must be checked.
     if entity_scope == DataVisibilityEnum.PUBLIC:
         user_authorized = True
+        public_entity = True
     else:
         # It's highly possible that there's no token provided
         user_token = get_user_token(request)
@@ -609,6 +622,8 @@ def get_entity_by_id(id):
             abort_bad_req("The specified query string is not supported. Use '?property=<key>' to filter the result")
     else:
         # Response with the dict
+        if public_entity and not user_in_sennet_read_group(request):
+            final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result)
         return jsonify(final_result)
 
 
@@ -678,9 +693,8 @@ def get_entities_by_ids_for_dashboard(entity_type: str, json_data_dict: dict):
     Metadata for the entity appropriate for an OpenSearch document, and filtered by an additional
     `property` arguments in the HTTP request.
 """
-@app.route('/documents/<id>', methods = ['GET'])
+@app.route('/documents/<id>', methods=['GET'])
 def get_document_by_id(id):
-
     result_dict = _get_metadata_by_id(entity_id=id, metadata_scope=MetadataScopeEnum.INDEX)
     return jsonify(result_dict)
 
@@ -1573,6 +1587,7 @@ def get_ancestors(id):
     entity_dict = query_target_entity(id)
     normalized_entity_type = entity_dict['entity_type']
     uuid = entity_dict['uuid']
+    public_entity = True
 
     # Collection doesn't have ancestors via Activity nodes
     if normalized_entity_type == 'Collection':
@@ -1583,10 +1598,12 @@ def get_ancestors(id):
         if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
             # Token is required and the user must belong to SenNet-READ group
             token = get_user_token(request, non_public_access_required=True)
+            public_entity = False
     elif normalized_entity_type == 'Sample':
         # The `data_access_level` of Sample can only be either 'public' or 'consortium'
         if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM:
             token = get_user_token(request, non_public_access_required=True)
+            public_entity = False
     else:
         # Source and Upload will always get back an empty list
         # becuase their direct ancestor is Lab, which is being skipped by Neo4j query
@@ -1639,6 +1656,16 @@ def get_ancestors(id):
         # Final result after normalization
         final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list, properties_to_include=['protocol_url'])
 
+        if public_entity and not user_in_sennet_read_group(request):
+            filtered_final_result = []
+            for ancestor in final_result:
+                ancestor_entity_type = ancestor.get('entity_type')
+                fields_to_exclude = schema_manager.get_fields_to_exclude(ancestor_entity_type)
+                filtered_ancestor = schema_manager.exclude_properties_from_response(fields_to_exclude, ancestor)
+                filtered_final_result.append(filtered_ancestor)
+
+            final_result = filtered_final_result
+
     return jsonify(final_result)
 
 
@@ -1753,6 +1780,7 @@ def get_parents(id):
     entity_dict = query_target_entity(id)
     normalized_entity_type = entity_dict['entity_type']
     uuid = entity_dict['uuid']
+    public_entity = True
 
     # Collection doesn't have ancestors via Activity nodes
     if normalized_entity_type == 'Collection':
@@ -1763,10 +1791,12 @@ def get_parents(id):
         if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
             # Token is required and the user must belong to SenNet-READ group
             token = get_user_token(request, non_public_access_required=True)
+            public_entity = False
     elif normalized_entity_type == 'Sample':
         # The `data_access_level` of Sample can only be either 'public' or 'consortium'
         if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM:
             token = get_user_token(request, non_public_access_required=True)
+            public_entity = False
     else:
         # Source and Upload will always get back an empty list
         # becuase their direct ancestor is Lab, which is being skipped by Neo4j query
@@ -1819,6 +1849,17 @@ def get_parents(id):
         # Final result after normalization
         final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
 
+        filtered_final_result = []
+        for parent in final_result:
+            parent_entity_type = parent.get('entity_type')
+            fields_to_exclude = schema_manager.get_fields_to_exclude(parent_entity_type)
+            if public_entity and not user_in_sennet_read_group(request):
+                filtered_parent = schema_manager.exclude_properties_from_response(fields_to_exclude, parent)
+                filtered_final_result.append(filtered_parent)
+            else:
+                filtered_final_result.append(parent)
+        final_result = filtered_final_result
+
     return jsonify(final_result)
 
 
@@ -1933,6 +1974,7 @@ def get_siblings(id):
     entity_dict = query_target_entity(id)
     normalized_entity_type = entity_dict['entity_type']
     uuid = entity_dict['uuid']
+    public_entity = True
 
     # Collection doesn't have ancestors via Activity nodes
     if normalized_entity_type == 'Collection':
@@ -1943,10 +1985,12 @@ def get_siblings(id):
         if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
             # Token is required and the user must belong to SenNet-READ group
             token = get_user_token(request, non_public_access_required=True)
+            public_entity = False
     elif normalized_entity_type == 'Sample':
         # The `data_access_level` of Sample can only be either 'public' or 'consortium'
         if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM:
             token = get_user_token(request, non_public_access_required=True)
+            public_entity = False
     else:
         # Source and Upload will always get back an empty list
         # becuase their direct ancestor is Lab, which is being skipped by Neo4j query
@@ -2011,6 +2055,17 @@ def get_siblings(id):
     # Final result after normalization
     final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
 
+    filtered_final_result = []
+    for sibling in final_result:
+        sibling_entity_type = sibling.get('entity_type')
+        fields_to_exclude = schema_manager.get_fields_to_exclude(sibling_entity_type)
+        if public_entity and not user_in_sennet_read_group(request):
+            filtered_sibling = schema_manager.exclude_properties_from_response(fields_to_exclude, sibling)
+            filtered_final_result.append(filtered_sibling)
+        else:
+            filtered_final_result.append(sibling)
+    final_result = filtered_final_result
+
     return jsonify(final_result)
 
 
@@ -2049,6 +2104,7 @@ def get_tuplets(id):
     entity_dict = query_target_entity(id)
     normalized_entity_type = entity_dict['entity_type']
     uuid = entity_dict['uuid']
+    public_entity = True
 
     # Collection doesn't have ancestors via Activity nodes
     if normalized_entity_type == 'Collection':
@@ -2059,10 +2115,12 @@ def get_tuplets(id):
         if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
             # Token is required and the user must belong to SenNet-READ group
             token = get_user_token(request, non_public_access_required=True)
+            public_entity = False
     elif normalized_entity_type == 'Sample':
         # The `data_access_level` of Sample can only be either 'public' or 'consortium'
         if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM:
             token = get_user_token(request, non_public_access_required=True)
+            public_entity = False
     else:
         # Source and Upload will always get back an empty list
         # becuase their direct ancestor is Lab, which is being skipped by Neo4j query
@@ -2117,6 +2175,17 @@ def get_tuplets(id):
     # Final result after normalization
     final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
 
+    filtered_final_result = []
+    for tuplet in final_result:
+        tuple_entity_type = tuplet.get('entity_type')
+        fields_to_exclude = schema_manager.get_fields_to_exclude(tuple_entity_type)
+        if public_entity and not user_in_sennet_read_group(request):
+            filtered_tuplet = schema_manager.exclude_properties_from_response(fields_to_exclude, tuplet)
+            filtered_final_result.append(filtered_tuplet)
+        else:
+            filtered_final_result.append(tuplet)
+    final_result = filtered_final_result
+
     return jsonify(final_result)
 
 
@@ -2500,7 +2569,9 @@ def get_dataset_latest_revision(id):
     # Query target entity against uuid-api and neo4j and return as a dict if exists
     entity_dict = query_target_entity(id)
     normalized_entity_type = entity_dict['entity_type']
+    fields_to_exclude = schema_manager.get_fields_to_exclude(normalized_entity_type)
     uuid = entity_dict['uuid']
+    public_entity = True
 
     # Only for Dataset
     if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
@@ -2512,7 +2583,7 @@ def get_dataset_latest_revision(id):
     if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
         # Token is required and the user must belong to SenNet-READ group
         token = get_user_token(request, non_public_access_required=True)
-
+        public_entity = False
         latest_revision_dict = app_neo4j_queries.get_dataset_latest_revision(neo4j_driver_instance, uuid)
     else:
         # Default to the latest "public" revision dataset
@@ -2539,6 +2610,9 @@ def get_dataset_latest_revision(id):
     # Also normalize the result based on schema
     final_result = schema_manager.normalize_object_result_for_response('ENTITIES', complete_dict)
 
+    if user_in_sennet_read_group(request) and public_entity:
+        final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result)
+
     # Response with the dict
     return jsonify(final_result)
 
@@ -2750,10 +2824,13 @@ def get_revisions_list(id):
     ]
     complete_revisions_list = schema_manager.get_complete_entities_list(token, sorted_revisions_list, properties_to_skip)
     normalized_revisions_list = schema_manager.normalize_entities_list_for_response(complete_revisions_list)
+    fields_to_exclude = schema_manager.get_fields_to_exclude(normalized_entity_type)
 
     # Only check the very last revision (the first revision dict since normalized_revisions_list is already sorted DESC)
     # to determine if send it back or not
+    is_in_read_group = True
     if not user_in_globus_read_group(request):
+        is_in_read_group = False
         latest_revision = normalized_revisions_list[0]
 
         if latest_revision['status'].lower() != DATASET_STATUS_PUBLISHED:
@@ -2773,6 +2850,8 @@ def get_revisions_list(id):
         }
         if show_dataset:
             result['dataset'] = revision
+            if not is_in_read_group:
+                result['dataset'] = schema_manager.exclude_properties_from_response(fields_to_exclude, revision)
         results.append(result)
         revision_number -= 1
 
@@ -2904,11 +2983,14 @@ def get_associated_organs_from_dataset(id):
     # Use the internal token to query the target entity
     # since public entities don't require user token
     token = get_internal_token()
+    excluded_fields = schema_manager.get_fields_to_exclude('Sample')
+    public_entity = True
 
     # published/public datasets don't require token
     if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
         # Token is required and the user must belong to SenNet-READ group
         token = get_user_token(request, non_public_access_required=True)
+        public_entity = False
 
     # By now, either the entity is public accessible or
     # the user token has the correct access level
@@ -2924,6 +3006,12 @@ def get_associated_organs_from_dataset(id):
     # Final result after normalization
     final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
 
+    if public_entity and not user_in_sennet_read_group(request):
+        filtered_organs_list = []
+        for organ in final_result:
+            filtered_organs_list.append(schema_manager.exclude_properties_from_response(excluded_fields, organ))
+        final_result = filtered_organs_list
+
     return jsonify(final_result)
 
 
@@ -2951,6 +3039,7 @@ def get_associated_samples_from_dataset(id):
     # Query target entity against uuid-api and neo4j and return as a dict if exists
     entity_dict = query_target_entity(id)
     normalized_entity_type = entity_dict['entity_type']
+    excluded_fields = schema_manager.get_fields_to_exclude('Sample')
 
     # Only for Dataset
     if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
@@ -2959,11 +3048,13 @@ def get_associated_samples_from_dataset(id):
     # Use the internal token to query the target entity
     # since public entities don't require user token
     token = get_internal_token()
+    public_entity = True
 
     # published/public datasets don't require token
     if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
         # Token is required and the user must belong to SenNet-READ group
         token = get_user_token(request, non_public_access_required=True)
+        public_entity = False
 
     # By now, either the entity is public accessible or the user token has the correct access level
     associated_samples = app_neo4j_queries.get_associated_samples_from_dataset(neo4j_driver_instance, entity_dict['uuid'])
@@ -2978,6 +3069,12 @@ def get_associated_samples_from_dataset(id):
     # Final result after normalization
     final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
 
+    if public_entity and not user_in_sennet_read_group(request):
+        filtered_sample_list = []
+        for sample in final_result:
+            filtered_sample_list.append(schema_manager.exclude_properties_from_response(excluded_fields, sample))
+        final_result = filtered_sample_list
+
     return jsonify(final_result)
 
 
@@ -3005,6 +3102,7 @@ def get_associated_sources_from_dataset(id):
     # Query target entity against uuid-api and neo4j and return as a dict if exists
     entity_dict = query_target_entity(id)
     normalized_entity_type = entity_dict['entity_type']
+    excluded_fields = schema_manager.get_fields_to_exclude('Source')
 
     # Only for Dataset
     if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
@@ -3013,11 +3111,13 @@ def get_associated_sources_from_dataset(id):
     # Use the internal token to query the target entity
     # since public entities don't require user token
     token = get_internal_token()
+    public_entity = True
 
     # published/public datasets don't require token
     if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
         # Token is required and the user must belong to SenNet-READ group
         token = get_user_token(request, non_public_access_required=True)
+        public_entity = False
 
     # By now, either the entity is public accessible or the user token has the correct access level
     associated_sources = app_neo4j_queries.get_associated_sources_from_dataset(neo4j_driver_instance, entity_dict['uuid'])
@@ -3032,6 +3132,12 @@ def get_associated_sources_from_dataset(id):
     # Final result after normalization
     final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
 
+    if public_entity and not user_in_sennet_read_group(request):
+        filtered_donor_list = []
+        for donor in final_result:
+            filtered_donor_list.append(schema_manager.exclude_properties_from_response(excluded_fields, donor))
+        final_result = filtered_donor_list
+
     return jsonify(final_result)
 
 
@@ -4403,13 +4509,13 @@ def multiple_components(user_token: str, json_data_dict: dict):
 Parameters
 ----------
 id : str
-    The SenNet ID (e.g. SNT123.ABCD.456) or UUID of given entity 
+    The SenNet ID (e.g. SNT123.ABCD.456) or UUID of given entity
 Returns
 -------
 json
     A list of all the collections of the target entity
 """
-@app.route('/entities/<id>/collections', methods = ['GET'])
+@app.route('/entities/<id>/collections', methods=['GET'])
 def get_collections(id):
     final_result = []
 
@@ -4426,13 +4532,15 @@ def get_collections(id):
     entity_dict = query_target_entity(id)
     normalized_entity_type = entity_dict['entity_type']
     uuid = entity_dict['uuid']
+    public_entity = True
 
     if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
         abort_bad_req(f"Unsupported entity type of id {id}: {normalized_entity_type}")
 
     if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
         # Token is required and the user must belong to HuBMAP-READ group
-        token = get_user_token(request, non_public_access_required = True)
+        token = get_user_token(request, non_public_access_required=True)
+        public_entity = False
 
     # By now, either the entity is public accessible or the user token has the correct access level
     # Result filtering based on query string
@@ -4478,6 +4586,24 @@ def get_collections(id):
         # Final result after normalization
         final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list)
 
+        filtered_final_result = []
+        for collection in final_result:
+            collection_entity_type = collection.get('entity_type')
+            fields_to_exclude = schema_manager.get_fields_to_exclude(collection_entity_type)
+            if public_entity and not user_in_sennet_read_group(request):
+                filtered_collection = schema_manager.exclude_properties_from_response(fields_to_exclude, collection)
+                datasets = filtered_collection.get('datasets')
+                filtered_datasets = []
+                for dataset in datasets:
+                    dataset_fields_to_exclude = schema_manager.get_fields_to_exclude(dataset.get('entity_type'))
+                    filtered_dataset = schema_manager.exclude_properties_from_response(dataset_fields_to_exclude, dataset)
+                    filtered_datasets.append(filtered_dataset)
+                filtered_collection['datasets'] = filtered_datasets
+                filtered_final_result.append(filtered_collection)
+            else:
+                filtered_final_result.append(collection)
+        final_result = filtered_final_result
+
     return jsonify(final_result)
 
 
@@ -5408,7 +5534,6 @@ def delete_cache(id):
         schema_manager.delete_memcached_cache(uuids_list)
 
 
-
 """
 Retrieve the JSON containing the normalized metadata information for a given entity appropriate for the
 scope of metadata requested e.g. complete data for a another service, indexing data for an OpenSearch document, etc.
@@ -5420,14 +5545,14 @@ def delete_cache(id):
 metadata_scope:
     A recognized scope from the SchemaConstants, controlling the triggers which are fired and elements
     from Neo4j which are retained.  Default is MetadataScopeEnum.INDEX.
-    
+
 Returns
 -------
 json
     Metadata for the entity appropriate for the metadata_scope argument, and filtered by an additional
     `property` arguments in the HTTP request.
 """
-def _get_metadata_by_id(entity_id:str=None, metadata_scope:MetadataScopeEnum=MetadataScopeEnum.INDEX):
+def _get_metadata_by_id(entity_id: str = None, metadata_scope: MetadataScopeEnum = MetadataScopeEnum.INDEX):
     # Token is not required, but if an invalid token provided,
     # we need to tell the client with a 401 error
     validate_token_if_auth_header_exists(request)
@@ -5440,10 +5565,11 @@ def _get_metadata_by_id(entity_id:str=None, metadata_scope:MetadataScopeEnum=Met
     # Otherwise query against uuid-api and neo4j to get the entity dict if the id exists
     entity_dict = query_target_entity(entity_id)
     normalized_entity_type = entity_dict['entity_type']
+    excluded_fields = schema_manager.get_fields_to_exclude(normalized_entity_type)
 
     # Get the entity result of the indexable dictionary from cache if exists, otherwise regenerate and cache
     metadata_dict = schema_manager.get_index_metadata(token, entity_dict) \
-        if metadata_scope==MetadataScopeEnum.INDEX \
+        if metadata_scope == MetadataScopeEnum.INDEX \
         else schema_manager.get_complete_entity_result(token, entity_dict)
 
     # Determine if the entity is publicly visible base on its data, only.
@@ -5451,11 +5577,14 @@ def _get_metadata_by_id(entity_id:str=None, metadata_scope:MetadataScopeEnum=Met
     # are populated as triggered data.  So pull back the complete entity for
     # _get_entity_visibility() to check.
     entity_scope = _get_entity_visibility(normalized_entity_type=normalized_entity_type, entity_dict=entity_dict)
+    public_entity = False
+    has_access = True
 
     # Initialize the user as authorized if the data is public.  Otherwise, the
     # user is not authorized and credentials must be checked.
     if entity_scope == DataVisibilityEnum.PUBLIC:
         user_authorized = True
+        public_entity = True
     else:
         # It's highly possible that there's no token provided
         user_token = get_user_token(request)
@@ -5470,13 +5599,19 @@ def _get_metadata_by_id(entity_id:str=None, metadata_scope:MetadataScopeEnum=Met
             # Or the token is valid but doesn't contain group information (auth token or transfer token)
             user_authorized = user_in_sennet_read_group(request)
 
+    user_token = get_user_token(request)
+    if isinstance(user_token, Response):
+        has_access = False
+    if not user_in_sennet_read_group(request):
+        has_access = False
+
     # We'll need to return all the properties including those generated by
     # `on_read_trigger` to have a complete result e.g., the 'next_revision_uuid' and
     # 'previous_revision_uuid' being used below.
     # Collections, however, will filter out only public properties for return.
     if not user_authorized:
-        abort_forbidden(f"The requested {normalized_entity_type} has non-public data."
-                        f"  A Globus token with access permission is required.")
+        abort_forbidden(f"The requested {normalized_entity_type} has non-public data. "
+                        "A Globus token with access permission is required.")
 
     # We need to exclude `antibodies` for now as it conflicts with some dynamic templates in the Search API
     # We need to include `protocol_url` as those are needed in the Portal
@@ -5500,7 +5635,7 @@ def _get_metadata_by_id(entity_id:str=None, metadata_scope:MetadataScopeEnum=Met
 
             if property_key == 'status' and \
                     not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
-                abort_bad_req(f"Only Dataset or Publication supports 'status' property key in the query string")
+                abort_bad_req("Only Dataset or Publication supports 'status' property key in the query string")
 
             # Response with the property value directly
             # Don't use jsonify() on string value
@@ -5508,6 +5643,10 @@ def _get_metadata_by_id(entity_id:str=None, metadata_scope:MetadataScopeEnum=Met
         else:
             abort_bad_req("The specified query string is not supported. Use '?property=<key>' to filter the result")
     else:
+        if public_entity and has_access is False:
+            modified_final_result = schema_manager.exclude_properties_from_response(excluded_fields, final_result)
+            return modified_final_result
+
         # Response with the dict
         return final_result
 
@@ -5544,8 +5683,9 @@ def user_in_sennet_read_group(request):
         # We treat such cases as the user not in the HuBMAP-READ group
         return False
 
-
     return (sennet_read_group_uuid in user_info['hmgroupids'])
+
+
 ####################################################################################################
 ## For local development/testing
 ####################################################################################################
diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml
index 8e7db7a..122da0e 100644
--- a/src/schema/provenance_schema.yaml
+++ b/src/schema/provenance_schema.yaml
@@ -351,6 +351,17 @@ ENTITIES:
   Dataset:
     # Only allowed applications can create new Dataset via POST
     before_entity_create_validator: validate_application_header_before_entity_create
+    excluded_properties_from_public_response:
+      - lab_dataset_id
+      - sources:
+          - lab_source_id
+      - ingest_metadata:
+          - metadata:
+              - lab_id
+              - slide_id
+      - cedar_mapped_metadata:
+          - Lab ID
+          - Slide ID
     # Dataset can be either derivation source or target
     derivation:
       source: true
@@ -812,6 +823,9 @@ ENTITIES:
     derivation:
       source: false
       target: true
+    excluded_properties_from_public_response:
+      - lab_source_id
+      - label
     properties:
       <<: *shared_properties
       <<: *shared_entity_properties
@@ -930,6 +944,20 @@ ENTITIES:
     derivation:
       source: true
       target: true
+    excluded_properties_from_public_response:
+      - lab_tissue_sample_id
+      - origin_sample:
+          - lab_tissue_sample_id
+      - origin_samples:
+          - lab_tissue_sample_id
+      - source:
+          - lab_source_id
+      - metadata:
+          - lab_id
+          - slide_id
+      - cedar_mapped_metadata:
+          - Lab ID
+          - Slide ID
     properties:
       <<: *shared_properties
       <<: *shared_entity_properties
diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index 5aae28b..9f7c992 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -298,6 +298,77 @@ def get_all_entity_types():
     return list(dict_keys)
 
 
+def get_fields_to_exclude(normalized_class=None):
+    """Retrieves fields designated in the provenance schema yaml under
+    excluded_properties_from_public_response and returns the fields in a list.
+
+    Parameters
+    ----------
+    normalized_class : Optional[str]
+        the normalized entity type of the entity who's fields are to be removed
+
+    Returns
+    -------
+    list[str]
+        A list of strings where each entry is a field to be excluded
+    """
+    # Determine the schema section based on class
+    excluded_fields = []
+    schema_section = _schema['ENTITIES']
+    exclude_list = schema_section[normalized_class].get('excluded_properties_from_public_response')
+    if exclude_list:
+        excluded_fields.extend(exclude_list)
+    return excluded_fields
+
+
+def exclude_properties_from_response(excluded_fields, output_dict):
+    """Removes specified fields from an existing dictionary.
+
+    Parameters
+    ----------
+    excluded_fields : list
+        A list of the fields to be excluded
+    output_dict : dictionary
+        A dictionary representing the data to be modified
+
+    Returns
+    -------
+    dict
+        The modified data with removed fields
+    """
+    def delete_nested_field(data, nested_path):
+        if isinstance(nested_path, dict):
+            for key, value in nested_path.items():
+                if key in data:
+                    if isinstance(value, list):
+                        for nested_field in value:
+                            if isinstance(nested_field, dict):
+                                delete_nested_field(data[key], nested_field)
+
+                            elif isinstance(data[key], list):
+                                for item in data[key]:
+                                    if nested_field in item:
+                                        del item[nested_field]
+
+                            elif nested_field in data[key]:
+                                del data[key][nested_field]
+                    elif isinstance(value, dict):
+                        delete_nested_field(data[key], value)
+
+        elif nested_path in data:
+            if isinstance(data[nested_path], list):
+                for item in data[nested_path]:
+                    if nested_path in item:
+                        del item[nested_path]
+            else:
+                del data[nested_path]
+
+    for field in excluded_fields:
+        delete_nested_field(output_dict, field)
+
+    return output_dict
+
+
 """
 Generating triggered data based on the target events and methods
 
@@ -2045,6 +2116,12 @@ def generate_activity_data(normalized_entity_type, user_token, user_info_dict, c
     return generated_activity_data_dict
 
 
+
+
+
+
+
+
 """
 Get the ingest-api URL to be used by trigger methods
 

From 5def30ae4513df2586b751eb9c67dd3d8a3788ac Mon Sep 17 00:00:00 2001
From: Tyler Madonna <tjm159@pitt.edu>
Date: Wed, 9 Oct 2024 13:36:29 -0400
Subject: [PATCH 2/5] Adding data access level check to descendents

---
 src/app.py | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/src/app.py b/src/app.py
index 59b3c0b..f66cfd5 100644
--- a/src/app.py
+++ b/src/app.py
@@ -1688,16 +1688,31 @@ def get_ancestors(id):
 def get_descendants(id):
     final_result = []
 
-    # Get user token from Authorization header
-    user_token = get_user_token(request)
+    # Use the internal token to query the target entity
+    # since public entities don't require user token
+    token = get_internal_token()
 
     # Make sure the id exists in uuid-api and
     # the corresponding entity also exists in neo4j
     entity_dict = query_target_entity(id)
+    normalized_entity_type = entity_dict['entity_type']
     uuid = entity_dict['uuid']
 
-    # Collection and Upload don't have descendants via Activity nodes
-    # No need to check, it'll always return empty list
+    if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
+        # Only published/public datasets don't require token
+        if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
+            # Token is required and the user must belong to SenNet-READ group
+            token = get_user_token(request, non_public_access_required=True)
+    elif normalized_entity_type == 'Sample' or normalized_entity_type == 'Source':
+        # The `data_access_level` of Sample/Source can only be either 'public' or 'consortium'
+        if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM:
+            token = get_user_token(request, non_public_access_required=True)
+    elif normalized_entity_type == 'Upload':
+        # Uploads are always consortium level
+        token = get_user_token(request, non_public_access_required=True)
+        return jsonify(final_result)
+    else:
+        return jsonify(final_result)
 
     # Result filtering based on query string
     if bool(request.args):
@@ -1737,7 +1752,7 @@ def get_descendants(id):
             'previous_revision_uuids'
         ]
 
-        complete_entities_list = schema_manager.get_complete_entities_list(user_token, descendants_list, properties_to_skip)
+        complete_entities_list = schema_manager.get_complete_entities_list(token, descendants_list, properties_to_skip)
 
         # Final result after normalization
         final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list, properties_to_include=['protocol_url'])
@@ -3947,7 +3962,7 @@ def validate_constraints(entry_json: list):
 str
     The token string if valid
 """
-def get_user_token(request, non_public_access_required = False):
+def get_user_token(request, non_public_access_required=False):
     # Get user token from Authorization header
     # getAuthorizationTokens() also handles MAuthorization header but we are not using that here
     try:

From 990b06ad8e4ba01c34deb4e2f6eb46595403dfc7 Mon Sep 17 00:00:00 2001
From: Tyler Madonna <tjm159@pitt.edu>
Date: Wed, 9 Oct 2024 13:44:48 -0400
Subject: [PATCH 3/5] Adding public excluded properties to descendents

---
 src/app.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/app.py b/src/app.py
index f66cfd5..15b2da6 100644
--- a/src/app.py
+++ b/src/app.py
@@ -1697,16 +1697,19 @@ def get_descendants(id):
     entity_dict = query_target_entity(id)
     normalized_entity_type = entity_dict['entity_type']
     uuid = entity_dict['uuid']
+    public_entity = True
 
     if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
         # Only published/public datasets don't require token
         if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
             # Token is required and the user must belong to SenNet-READ group
             token = get_user_token(request, non_public_access_required=True)
+            public_entity = False
     elif normalized_entity_type == 'Sample' or normalized_entity_type == 'Source':
         # The `data_access_level` of Sample/Source can only be either 'public' or 'consortium'
         if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM:
             token = get_user_token(request, non_public_access_required=True)
+            public_entity = False
     elif normalized_entity_type == 'Upload':
         # Uploads are always consortium level
         token = get_user_token(request, non_public_access_required=True)
@@ -1757,6 +1760,16 @@ def get_descendants(id):
         # Final result after normalization
         final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list, properties_to_include=['protocol_url'])
 
+        if public_entity and not user_in_sennet_read_group(request):
+            filtered_final_result = []
+            for ancestor in final_result:
+                ancestor_entity_type = ancestor.get('entity_type')
+                fields_to_exclude = schema_manager.get_fields_to_exclude(ancestor_entity_type)
+                filtered_ancestor = schema_manager.exclude_properties_from_response(fields_to_exclude, ancestor)
+                filtered_final_result.append(filtered_ancestor)
+
+            final_result = filtered_final_result
+
     return jsonify(final_result)
 
 

From 58f888146738b0cb62efb266bcfa6c3582e97ed4 Mon Sep 17 00:00:00 2001
From: Tyler Madonna <tjm159@pitt.edu>
Date: Wed, 9 Oct 2024 14:03:04 -0400
Subject: [PATCH 4/5] Removing lab_dataset_id from test responses

---
 test/data/get_ancestors_success_dataset.json    | 4 ----
 test/data/get_descendants_success_dataset.json  | 2 --
 test/data/get_entity_by_id_success_dataset.json | 1 -
 3 files changed, 7 deletions(-)

diff --git a/test/data/get_ancestors_success_dataset.json b/test/data/get_ancestors_success_dataset.json
index bfe7718..3c74b31 100644
--- a/test/data/get_ancestors_success_dataset.json
+++ b/test/data/get_ancestors_success_dataset.json
@@ -55,7 +55,6 @@
       "uuid": "5fa78f4fa272db58a7fcc7590376f5e7",
       "source": {
         "metadata": {},
-        "lab_source_id": "Human Source 1",
         "group_name": "CODCC Testing Group",
         "sennet_id": "SNT522.GDLF.724",
         "last_modified_timestamp": 1681844922032,
@@ -83,7 +82,6 @@
       "entity_type": "Sample",
       "group_name": "CODCC Testing Group",
       "group_uuid": "57192604-18e0-11ed-b79b-972795fc9504",
-      "lab_tissue_sample_id": "Human Blood",
       "last_modified_timestamp": 1681828779121,
       "last_modified_user_displayname": "Test User",
       "last_modified_user_email": "TESTUSER@example.com",
@@ -94,7 +92,6 @@
       "uuid": "cf3d0408de9afd703c8bd71808176b38",
       "source": {
         "metadata": {},
-        "lab_source_id": "Human Source 1",
         "group_name": "CODCC Testing Group",
         "sennet_id": "SNT522.GDLF.724",
         "last_modified_timestamp": 1681844922032,
@@ -123,7 +120,6 @@
       "entity_type": "Source",
       "group_name": "CODCC Testing Group",
       "group_uuid": "57192604-18e0-11ed-b79b-972795fc9504",
-      "lab_source_id": "Human Source 1",
       "last_modified_timestamp": 1681844922032,
       "last_modified_user_displayname": "Test User",
       "last_modified_user_email": "TESTUSER@example.com",
diff --git a/test/data/get_descendants_success_dataset.json b/test/data/get_descendants_success_dataset.json
index f7cefdc..564dad7 100644
--- a/test/data/get_descendants_success_dataset.json
+++ b/test/data/get_descendants_success_dataset.json
@@ -13,7 +13,6 @@
       "entity_type": "Dataset",
       "group_name": "CODCC Testing Group",
       "group_uuid": "57192604-18e0-11ed-b79b-972795fc9504",
-      "lab_dataset_id": "Vitessce example - secondary dataset",
       "last_modified_timestamp": 1681841991272,
       "last_modified_user_displayname": "Test User",
       "last_modified_user_email": "TESTUSER@example.com",
@@ -26,7 +25,6 @@
       "sources": [
         {
           "metadata": {},
-          "lab_source_id": "Human Source 1",
           "group_name": "CODCC Testing Group",
           "sennet_id": "SNT522.GDLF.724",
           "last_modified_timestamp": 1681844922032,
diff --git a/test/data/get_entity_by_id_success_dataset.json b/test/data/get_entity_by_id_success_dataset.json
index 53c80ab..75c7a2e 100644
--- a/test/data/get_entity_by_id_success_dataset.json
+++ b/test/data/get_entity_by_id_success_dataset.json
@@ -49,7 +49,6 @@
     "entity_type": "Dataset",
     "group_name": "University of Pittsburgh TMC",
     "group_uuid": "28db7a2b-ed8a-11ec-8b0a-9fe9b51132b1",
-    "lab_dataset_id": "897-Dataset",
     "last_modified_timestamp": 1683227917901,
     "last_modified_user_displayname": "Test User",
     "last_modified_user_email": "TESTUSER@example.com",

From 2de034f46dbcf98989f1cb0f204625d936706ed4 Mon Sep 17 00:00:00 2001
From: Tyler Madonna <tjm159@pitt.edu>
Date: Wed, 9 Oct 2024 16:52:45 -0400
Subject: [PATCH 5/5] Restricting ancestors and descendants responses

---
 src/app.py               | 16 +++++++++++-----
 src/app_neo4j_queries.py | 24 ++++++++++++++++++------
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/src/app.py b/src/app.py
index 15b2da6..bf99e88 100644
--- a/src/app.py
+++ b/src/app.py
@@ -1610,6 +1610,9 @@ def get_ancestors(id):
         # So no need to execute the code below
         return jsonify(final_result)
 
+    authorized = user_in_sennet_read_group(request)
+    data_access_level = 'public' if authorized is False else None
+
     # By now, either the entity is public accessible or the user token has the correct access level
     # Result filtering based on query string
     if bool(request.args):
@@ -1623,7 +1626,7 @@ def get_ancestors(id):
                 abort_bad_req(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(result_filtering_accepted_property_keys)}")
 
             # Only return a list of the filtered property value of each entity
-            property_list = app_neo4j_queries.get_ancestors(neo4j_driver_instance, uuid, property_key)
+            property_list = app_neo4j_queries.get_ancestors(neo4j_driver_instance, uuid, data_access_level, property_key)
 
             # Final result
             final_result = property_list
@@ -1631,7 +1634,7 @@ def get_ancestors(id):
             abort_bad_req("The specified query string is not supported. Use '?property=<key>' to filter the result")
     # Return all the details if no property filtering
     else:
-        ancestors_list = app_neo4j_queries.get_ancestors(neo4j_driver_instance, uuid)
+        ancestors_list = app_neo4j_queries.get_ancestors(neo4j_driver_instance, uuid, data_access_level)
 
         # Generate trigger data
         # Skip some of the properties that are time-consuming to generate via triggers
@@ -1717,6 +1720,9 @@ def get_descendants(id):
     else:
         return jsonify(final_result)
 
+    authorized = user_in_sennet_read_group(request)
+    data_access_level = 'public' if authorized is False else None
+
     # Result filtering based on query string
     if bool(request.args):
         property_key = request.args.get('property')
@@ -1729,7 +1735,7 @@ def get_descendants(id):
                 abort_bad_req(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(result_filtering_accepted_property_keys)}")
 
             # Only return a list of the filtered property value of each entity
-            property_list = app_neo4j_queries.get_descendants(neo4j_driver_instance, uuid, property_key)
+            property_list = app_neo4j_queries.get_descendants(neo4j_driver_instance, uuid, data_access_level, property_key)
 
             # Final result
             final_result = property_list
@@ -1737,7 +1743,7 @@ def get_descendants(id):
             abort_bad_req("The specified query string is not supported. Use '?property=<key>' to filter the result")
     # Return all the details if no property filtering
     else:
-        descendants_list = app_neo4j_queries.get_descendants(neo4j_driver_instance, uuid)
+        descendants_list = app_neo4j_queries.get_descendants(neo4j_driver_instance, uuid, data_access_level)
 
         # Generate trigger data and merge into a big dict
         # and skip some of the properties that are time-consuming to generate via triggers
@@ -1760,7 +1766,7 @@ def get_descendants(id):
         # Final result after normalization
         final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list, properties_to_include=['protocol_url'])
 
-        if public_entity and not user_in_sennet_read_group(request):
+        if public_entity and not authorized:
             filtered_final_result = []
             for ancestor in final_result:
                 ancestor_entity_type = ancestor.get('entity_type')
diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py
index ddafcb9..ee3458a 100644
--- a/src/app_neo4j_queries.py
+++ b/src/app_neo4j_queries.py
@@ -558,7 +558,7 @@ def update_entity(neo4j_driver, entity_type, entity_data_dict, uuid):
         raise TransactionError(msg)
 
 
-def get_ancestors(neo4j_driver, uuid, property_key=None):
+def get_ancestors(neo4j_driver, uuid, data_access_level=None, property_key=None):
     """Get all ancestors by uuid.
 
     Parameters
@@ -567,6 +567,8 @@ def get_ancestors(neo4j_driver, uuid, property_key=None):
         The neo4j database connection pool
     uuid : str
         The uuid of target entity
+    data_access_level : Optional[str]
+        The data access level of the ancestor entities (public or consortium). None returns all ancestors.
     property_key : str
         A target property key for result filtering
 
@@ -577,17 +579,21 @@ def get_ancestors(neo4j_driver, uuid, property_key=None):
     """
     results = []
 
+    predicate = ''
+    if data_access_level:
+        predicate = f"AND ancestor.data_access_level = '{data_access_level}' "
+
     if property_key:
         query = (f"MATCH (e:Entity)-[:USED|WAS_GENERATED_BY*]->(ancestor:Entity) "
                  # Filter out the Lab entities
-                 f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' "
+                 f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' {predicate}"
                  # COLLECT() returns a list
                  # apoc.coll.toSet() reruns a set containing unique nodes
                  f"RETURN apoc.coll.toSet(COLLECT(ancestor.{property_key})) AS {record_field_name}")
     else:
         query = (f"MATCH (e:Entity)-[:USED|WAS_GENERATED_BY*]->(ancestor:Entity) "
                  # Filter out the Lab entities
-                 f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' "
+                 f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' {predicate}"
                  # COLLECT() returns a list
                  # apoc.coll.toSet() reruns a set containing unique nodes
                  f"RETURN apoc.coll.toSet(COLLECT(ancestor)) AS {record_field_name}")
@@ -614,7 +620,7 @@ def get_ancestors(neo4j_driver, uuid, property_key=None):
     return results
 
 
-def get_descendants(neo4j_driver, uuid, property_key=None):
+def get_descendants(neo4j_driver, uuid, data_access_level=None, property_key=None):
     """ Get all descendants by uuid
 
     Parameters
@@ -623,6 +629,8 @@ def get_descendants(neo4j_driver, uuid, property_key=None):
         The neo4j database connection pool
     uuid : str
         The uuid of target entity
+    data_access_level : Optional[str]
+        The data access level of the descendant entities (public or consortium). None returns all descendants.
     property_key : str
         A target property key for result filtering
 
@@ -633,17 +641,21 @@ def get_descendants(neo4j_driver, uuid, property_key=None):
     """
     results = []
 
+    predicate = ''
+    if data_access_level:
+        predicate = f"AND descendant.data_access_level = '{data_access_level}' "
+
     if property_key:
         query = (f"MATCH (e:Entity)<-[:USED|WAS_GENERATED_BY*]-(descendant:Entity) "
                  # The target entity can't be a Lab
-                 f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' "
+                 f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' {predicate}"
                  # COLLECT() returns a list
                  # apoc.coll.toSet() reruns a set containing unique nodes
                  f"RETURN apoc.coll.toSet(COLLECT(descendant.{property_key})) AS {record_field_name}")
     else:
         query = (f"MATCH (e:Entity)<-[:USED|WAS_GENERATED_BY*]-(descendant:Entity) "
                  # The target entity can't be a Lab
-                 f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' "
+                 f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' {predicate}"
                  # COLLECT() returns a list
                  # apoc.coll.toSet() reruns a set containing unique nodes
                  f"RETURN apoc.coll.toSet(COLLECT(descendant)) AS {record_field_name}")