From d2d0134844badf45fa2081ac124654c9e021a53b Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Wed, 9 Oct 2024 11:08:51 -0400 Subject: [PATCH 1/5] Adding excluded properties from public response --- src/app.py | 168 +++++++++++++++++++++++++++--- src/schema/provenance_schema.yaml | 28 +++++ src/schema/schema_manager.py | 77 ++++++++++++++ 3 files changed, 259 insertions(+), 14 deletions(-) diff --git a/src/app.py b/src/app.py index d273b6a..59b3c0b 100644 --- a/src/app.py +++ b/src/app.py @@ -446,18 +446,22 @@ def get_ancestor_organs(id): # since public entities don't require user token token = get_internal_token() + public_entity = True if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): # Only published/public datasets don't require token if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: # Token is required and the user must belong to SenNet-READ group token = get_user_token(request, non_public_access_required=True) + public_entity = False else: # The `data_access_level` of Sample can only be either 'public' or 'consortium' if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM: token = get_user_token(request, non_public_access_required=True) + public_entity = False # By now, either the entity is public accessible or the user token has the correct access level organs = app_neo4j_queries.get_ancestor_organs(neo4j_driver_instance, entity_dict['uuid']) + excluded_fields = schema_manager.get_fields_to_exclude('Sample') # Skip executing the trigger method to get Sample.direct_ancestor properties_to_skip = ['direct_ancestor'] @@ -466,6 +470,12 @@ def get_ancestor_organs(id): # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) + if public_entity and not user_in_sennet_read_group(request): + filtered_organs_list = [] + for organ in final_result: + filtered_organs_list.append(schema_manager.exclude_properties_from_response(excluded_fields, organ)) + final_result = filtered_organs_list + return jsonify(final_result) @@ -540,6 +550,7 @@ def get_entity_by_id(id): # Query target entity against uuid-api and neo4j and return as a dict if exists entity_dict = query_target_entity(id) normalized_entity_type = entity_dict['entity_type'] + fields_to_exclude = schema_manager.get_fields_to_exclude(normalized_entity_type) # Use the internal token to query the target entity # since public entities don't require user token @@ -553,11 +564,13 @@ def get_entity_by_id(id): # Determine if the entity is publicly visible base on its data, only. entity_scope = _get_entity_visibility(normalized_entity_type=normalized_entity_type, entity_dict=complete_dict) + public_entity = False # Initialize the user as authorized if the data is public. Otherwise, the # user is not authorized and credentials must be checked. if entity_scope == DataVisibilityEnum.PUBLIC: user_authorized = True + public_entity = True else: # It's highly possible that there's no token provided user_token = get_user_token(request) @@ -609,6 +622,8 @@ def get_entity_by_id(id): abort_bad_req("The specified query string is not supported. Use '?property=' to filter the result") else: # Response with the dict + if public_entity and not user_in_sennet_read_group(request): + final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result) return jsonify(final_result) @@ -678,9 +693,8 @@ def get_entities_by_ids_for_dashboard(entity_type: str, json_data_dict: dict): Metadata for the entity appropriate for an OpenSearch document, and filtered by an additional `property` arguments in the HTTP request. """ -@app.route('/documents/', methods = ['GET']) +@app.route('/documents/', methods=['GET']) def get_document_by_id(id): - result_dict = _get_metadata_by_id(entity_id=id, metadata_scope=MetadataScopeEnum.INDEX) return jsonify(result_dict) @@ -1573,6 +1587,7 @@ def get_ancestors(id): entity_dict = query_target_entity(id) normalized_entity_type = entity_dict['entity_type'] uuid = entity_dict['uuid'] + public_entity = True # Collection doesn't have ancestors via Activity nodes if normalized_entity_type == 'Collection': @@ -1583,10 +1598,12 @@ def get_ancestors(id): if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: # Token is required and the user must belong to SenNet-READ group token = get_user_token(request, non_public_access_required=True) + public_entity = False elif normalized_entity_type == 'Sample': # The `data_access_level` of Sample can only be either 'public' or 'consortium' if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM: token = get_user_token(request, non_public_access_required=True) + public_entity = False else: # Source and Upload will always get back an empty list # becuase their direct ancestor is Lab, which is being skipped by Neo4j query @@ -1639,6 +1656,16 @@ def get_ancestors(id): # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list, properties_to_include=['protocol_url']) + if public_entity and not user_in_sennet_read_group(request): + filtered_final_result = [] + for ancestor in final_result: + ancestor_entity_type = ancestor.get('entity_type') + fields_to_exclude = schema_manager.get_fields_to_exclude(ancestor_entity_type) + filtered_ancestor = schema_manager.exclude_properties_from_response(fields_to_exclude, ancestor) + filtered_final_result.append(filtered_ancestor) + + final_result = filtered_final_result + return jsonify(final_result) @@ -1753,6 +1780,7 @@ def get_parents(id): entity_dict = query_target_entity(id) normalized_entity_type = entity_dict['entity_type'] uuid = entity_dict['uuid'] + public_entity = True # Collection doesn't have ancestors via Activity nodes if normalized_entity_type == 'Collection': @@ -1763,10 +1791,12 @@ def get_parents(id): if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: # Token is required and the user must belong to SenNet-READ group token = get_user_token(request, non_public_access_required=True) + public_entity = False elif normalized_entity_type == 'Sample': # The `data_access_level` of Sample can only be either 'public' or 'consortium' if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM: token = get_user_token(request, non_public_access_required=True) + public_entity = False else: # Source and Upload will always get back an empty list # becuase their direct ancestor is Lab, which is being skipped by Neo4j query @@ -1819,6 +1849,17 @@ def get_parents(id): # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) + filtered_final_result = [] + for parent in final_result: + parent_entity_type = parent.get('entity_type') + fields_to_exclude = schema_manager.get_fields_to_exclude(parent_entity_type) + if public_entity and not user_in_sennet_read_group(request): + filtered_parent = schema_manager.exclude_properties_from_response(fields_to_exclude, parent) + filtered_final_result.append(filtered_parent) + else: + filtered_final_result.append(parent) + final_result = filtered_final_result + return jsonify(final_result) @@ -1933,6 +1974,7 @@ def get_siblings(id): entity_dict = query_target_entity(id) normalized_entity_type = entity_dict['entity_type'] uuid = entity_dict['uuid'] + public_entity = True # Collection doesn't have ancestors via Activity nodes if normalized_entity_type == 'Collection': @@ -1943,10 +1985,12 @@ def get_siblings(id): if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: # Token is required and the user must belong to SenNet-READ group token = get_user_token(request, non_public_access_required=True) + public_entity = False elif normalized_entity_type == 'Sample': # The `data_access_level` of Sample can only be either 'public' or 'consortium' if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM: token = get_user_token(request, non_public_access_required=True) + public_entity = False else: # Source and Upload will always get back an empty list # becuase their direct ancestor is Lab, which is being skipped by Neo4j query @@ -2011,6 +2055,17 @@ def get_siblings(id): # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) + filtered_final_result = [] + for sibling in final_result: + sibling_entity_type = sibling.get('entity_type') + fields_to_exclude = schema_manager.get_fields_to_exclude(sibling_entity_type) + if public_entity and not user_in_sennet_read_group(request): + filtered_sibling = schema_manager.exclude_properties_from_response(fields_to_exclude, sibling) + filtered_final_result.append(filtered_sibling) + else: + filtered_final_result.append(sibling) + final_result = filtered_final_result + return jsonify(final_result) @@ -2049,6 +2104,7 @@ def get_tuplets(id): entity_dict = query_target_entity(id) normalized_entity_type = entity_dict['entity_type'] uuid = entity_dict['uuid'] + public_entity = True # Collection doesn't have ancestors via Activity nodes if normalized_entity_type == 'Collection': @@ -2059,10 +2115,12 @@ def get_tuplets(id): if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: # Token is required and the user must belong to SenNet-READ group token = get_user_token(request, non_public_access_required=True) + public_entity = False elif normalized_entity_type == 'Sample': # The `data_access_level` of Sample can only be either 'public' or 'consortium' if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM: token = get_user_token(request, non_public_access_required=True) + public_entity = False else: # Source and Upload will always get back an empty list # becuase their direct ancestor is Lab, which is being skipped by Neo4j query @@ -2117,6 +2175,17 @@ def get_tuplets(id): # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) + filtered_final_result = [] + for tuplet in final_result: + tuple_entity_type = tuplet.get('entity_type') + fields_to_exclude = schema_manager.get_fields_to_exclude(tuple_entity_type) + if public_entity and not user_in_sennet_read_group(request): + filtered_tuplet = schema_manager.exclude_properties_from_response(fields_to_exclude, tuplet) + filtered_final_result.append(filtered_tuplet) + else: + filtered_final_result.append(tuplet) + final_result = filtered_final_result + return jsonify(final_result) @@ -2500,7 +2569,9 @@ def get_dataset_latest_revision(id): # Query target entity against uuid-api and neo4j and return as a dict if exists entity_dict = query_target_entity(id) normalized_entity_type = entity_dict['entity_type'] + fields_to_exclude = schema_manager.get_fields_to_exclude(normalized_entity_type) uuid = entity_dict['uuid'] + public_entity = True # Only for Dataset if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): @@ -2512,7 +2583,7 @@ def get_dataset_latest_revision(id): if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: # Token is required and the user must belong to SenNet-READ group token = get_user_token(request, non_public_access_required=True) - + public_entity = False latest_revision_dict = app_neo4j_queries.get_dataset_latest_revision(neo4j_driver_instance, uuid) else: # Default to the latest "public" revision dataset @@ -2539,6 +2610,9 @@ def get_dataset_latest_revision(id): # Also normalize the result based on schema final_result = schema_manager.normalize_object_result_for_response('ENTITIES', complete_dict) + if user_in_sennet_read_group(request) and public_entity: + final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result) + # Response with the dict return jsonify(final_result) @@ -2750,10 +2824,13 @@ def get_revisions_list(id): ] complete_revisions_list = schema_manager.get_complete_entities_list(token, sorted_revisions_list, properties_to_skip) normalized_revisions_list = schema_manager.normalize_entities_list_for_response(complete_revisions_list) + fields_to_exclude = schema_manager.get_fields_to_exclude(normalized_entity_type) # Only check the very last revision (the first revision dict since normalized_revisions_list is already sorted DESC) # to determine if send it back or not + is_in_read_group = True if not user_in_globus_read_group(request): + is_in_read_group = False latest_revision = normalized_revisions_list[0] if latest_revision['status'].lower() != DATASET_STATUS_PUBLISHED: @@ -2773,6 +2850,8 @@ def get_revisions_list(id): } if show_dataset: result['dataset'] = revision + if not is_in_read_group: + result['dataset'] = schema_manager.exclude_properties_from_response(fields_to_exclude, revision) results.append(result) revision_number -= 1 @@ -2904,11 +2983,14 @@ def get_associated_organs_from_dataset(id): # Use the internal token to query the target entity # since public entities don't require user token token = get_internal_token() + excluded_fields = schema_manager.get_fields_to_exclude('Sample') + public_entity = True # published/public datasets don't require token if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: # Token is required and the user must belong to SenNet-READ group token = get_user_token(request, non_public_access_required=True) + public_entity = False # By now, either the entity is public accessible or # the user token has the correct access level @@ -2924,6 +3006,12 @@ def get_associated_organs_from_dataset(id): # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) + if public_entity and not user_in_sennet_read_group(request): + filtered_organs_list = [] + for organ in final_result: + filtered_organs_list.append(schema_manager.exclude_properties_from_response(excluded_fields, organ)) + final_result = filtered_organs_list + return jsonify(final_result) @@ -2951,6 +3039,7 @@ def get_associated_samples_from_dataset(id): # Query target entity against uuid-api and neo4j and return as a dict if exists entity_dict = query_target_entity(id) normalized_entity_type = entity_dict['entity_type'] + excluded_fields = schema_manager.get_fields_to_exclude('Sample') # Only for Dataset if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): @@ -2959,11 +3048,13 @@ def get_associated_samples_from_dataset(id): # Use the internal token to query the target entity # since public entities don't require user token token = get_internal_token() + public_entity = True # published/public datasets don't require token if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: # Token is required and the user must belong to SenNet-READ group token = get_user_token(request, non_public_access_required=True) + public_entity = False # By now, either the entity is public accessible or the user token has the correct access level associated_samples = app_neo4j_queries.get_associated_samples_from_dataset(neo4j_driver_instance, entity_dict['uuid']) @@ -2978,6 +3069,12 @@ def get_associated_samples_from_dataset(id): # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) + if public_entity and not user_in_sennet_read_group(request): + filtered_sample_list = [] + for sample in final_result: + filtered_sample_list.append(schema_manager.exclude_properties_from_response(excluded_fields, sample)) + final_result = filtered_sample_list + return jsonify(final_result) @@ -3005,6 +3102,7 @@ def get_associated_sources_from_dataset(id): # Query target entity against uuid-api and neo4j and return as a dict if exists entity_dict = query_target_entity(id) normalized_entity_type = entity_dict['entity_type'] + excluded_fields = schema_manager.get_fields_to_exclude('Source') # Only for Dataset if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): @@ -3013,11 +3111,13 @@ def get_associated_sources_from_dataset(id): # Use the internal token to query the target entity # since public entities don't require user token token = get_internal_token() + public_entity = True # published/public datasets don't require token if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: # Token is required and the user must belong to SenNet-READ group token = get_user_token(request, non_public_access_required=True) + public_entity = False # By now, either the entity is public accessible or the user token has the correct access level associated_sources = app_neo4j_queries.get_associated_sources_from_dataset(neo4j_driver_instance, entity_dict['uuid']) @@ -3032,6 +3132,12 @@ def get_associated_sources_from_dataset(id): # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) + if public_entity and not user_in_sennet_read_group(request): + filtered_donor_list = [] + for donor in final_result: + filtered_donor_list.append(schema_manager.exclude_properties_from_response(excluded_fields, donor)) + final_result = filtered_donor_list + return jsonify(final_result) @@ -4403,13 +4509,13 @@ def multiple_components(user_token: str, json_data_dict: dict): Parameters ---------- id : str - The SenNet ID (e.g. SNT123.ABCD.456) or UUID of given entity + The SenNet ID (e.g. SNT123.ABCD.456) or UUID of given entity Returns ------- json A list of all the collections of the target entity """ -@app.route('/entities//collections', methods = ['GET']) +@app.route('/entities//collections', methods=['GET']) def get_collections(id): final_result = [] @@ -4426,13 +4532,15 @@ def get_collections(id): entity_dict = query_target_entity(id) normalized_entity_type = entity_dict['entity_type'] uuid = entity_dict['uuid'] + public_entity = True if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): abort_bad_req(f"Unsupported entity type of id {id}: {normalized_entity_type}") if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: # Token is required and the user must belong to HuBMAP-READ group - token = get_user_token(request, non_public_access_required = True) + token = get_user_token(request, non_public_access_required=True) + public_entity = False # By now, either the entity is public accessible or the user token has the correct access level # Result filtering based on query string @@ -4478,6 +4586,24 @@ def get_collections(id): # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) + filtered_final_result = [] + for collection in final_result: + collection_entity_type = collection.get('entity_type') + fields_to_exclude = schema_manager.get_fields_to_exclude(collection_entity_type) + if public_entity and not user_in_sennet_read_group(request): + filtered_collection = schema_manager.exclude_properties_from_response(fields_to_exclude, collection) + datasets = filtered_collection.get('datasets') + filtered_datasets = [] + for dataset in datasets: + dataset_fields_to_exclude = schema_manager.get_fields_to_exclude(dataset.get('entity_type')) + filtered_dataset = schema_manager.exclude_properties_from_response(dataset_fields_to_exclude, dataset) + filtered_datasets.append(filtered_dataset) + filtered_collection['datasets'] = filtered_datasets + filtered_final_result.append(filtered_collection) + else: + filtered_final_result.append(collection) + final_result = filtered_final_result + return jsonify(final_result) @@ -5408,7 +5534,6 @@ def delete_cache(id): schema_manager.delete_memcached_cache(uuids_list) - """ Retrieve the JSON containing the normalized metadata information for a given entity appropriate for the scope of metadata requested e.g. complete data for a another service, indexing data for an OpenSearch document, etc. @@ -5420,14 +5545,14 @@ def delete_cache(id): metadata_scope: A recognized scope from the SchemaConstants, controlling the triggers which are fired and elements from Neo4j which are retained. Default is MetadataScopeEnum.INDEX. - + Returns ------- json Metadata for the entity appropriate for the metadata_scope argument, and filtered by an additional `property` arguments in the HTTP request. """ -def _get_metadata_by_id(entity_id:str=None, metadata_scope:MetadataScopeEnum=MetadataScopeEnum.INDEX): +def _get_metadata_by_id(entity_id: str = None, metadata_scope: MetadataScopeEnum = MetadataScopeEnum.INDEX): # Token is not required, but if an invalid token provided, # we need to tell the client with a 401 error validate_token_if_auth_header_exists(request) @@ -5440,10 +5565,11 @@ def _get_metadata_by_id(entity_id:str=None, metadata_scope:MetadataScopeEnum=Met # Otherwise query against uuid-api and neo4j to get the entity dict if the id exists entity_dict = query_target_entity(entity_id) normalized_entity_type = entity_dict['entity_type'] + excluded_fields = schema_manager.get_fields_to_exclude(normalized_entity_type) # Get the entity result of the indexable dictionary from cache if exists, otherwise regenerate and cache metadata_dict = schema_manager.get_index_metadata(token, entity_dict) \ - if metadata_scope==MetadataScopeEnum.INDEX \ + if metadata_scope == MetadataScopeEnum.INDEX \ else schema_manager.get_complete_entity_result(token, entity_dict) # Determine if the entity is publicly visible base on its data, only. @@ -5451,11 +5577,14 @@ def _get_metadata_by_id(entity_id:str=None, metadata_scope:MetadataScopeEnum=Met # are populated as triggered data. So pull back the complete entity for # _get_entity_visibility() to check. entity_scope = _get_entity_visibility(normalized_entity_type=normalized_entity_type, entity_dict=entity_dict) + public_entity = False + has_access = True # Initialize the user as authorized if the data is public. Otherwise, the # user is not authorized and credentials must be checked. if entity_scope == DataVisibilityEnum.PUBLIC: user_authorized = True + public_entity = True else: # It's highly possible that there's no token provided user_token = get_user_token(request) @@ -5470,13 +5599,19 @@ def _get_metadata_by_id(entity_id:str=None, metadata_scope:MetadataScopeEnum=Met # Or the token is valid but doesn't contain group information (auth token or transfer token) user_authorized = user_in_sennet_read_group(request) + user_token = get_user_token(request) + if isinstance(user_token, Response): + has_access = False + if not user_in_sennet_read_group(request): + has_access = False + # We'll need to return all the properties including those generated by # `on_read_trigger` to have a complete result e.g., the 'next_revision_uuid' and # 'previous_revision_uuid' being used below. # Collections, however, will filter out only public properties for return. if not user_authorized: - abort_forbidden(f"The requested {normalized_entity_type} has non-public data." - f" A Globus token with access permission is required.") + abort_forbidden(f"The requested {normalized_entity_type} has non-public data. " + "A Globus token with access permission is required.") # We need to exclude `antibodies` for now as it conflicts with some dynamic templates in the Search API # We need to include `protocol_url` as those are needed in the Portal @@ -5500,7 +5635,7 @@ def _get_metadata_by_id(entity_id:str=None, metadata_scope:MetadataScopeEnum=Met if property_key == 'status' and \ not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): - abort_bad_req(f"Only Dataset or Publication supports 'status' property key in the query string") + abort_bad_req("Only Dataset or Publication supports 'status' property key in the query string") # Response with the property value directly # Don't use jsonify() on string value @@ -5508,6 +5643,10 @@ def _get_metadata_by_id(entity_id:str=None, metadata_scope:MetadataScopeEnum=Met else: abort_bad_req("The specified query string is not supported. Use '?property=' to filter the result") else: + if public_entity and has_access is False: + modified_final_result = schema_manager.exclude_properties_from_response(excluded_fields, final_result) + return modified_final_result + # Response with the dict return final_result @@ -5544,8 +5683,9 @@ def user_in_sennet_read_group(request): # We treat such cases as the user not in the HuBMAP-READ group return False - return (sennet_read_group_uuid in user_info['hmgroupids']) + + #################################################################################################### ## For local development/testing #################################################################################################### diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 8e7db7a..122da0e 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -351,6 +351,17 @@ ENTITIES: Dataset: # Only allowed applications can create new Dataset via POST before_entity_create_validator: validate_application_header_before_entity_create + excluded_properties_from_public_response: + - lab_dataset_id + - sources: + - lab_source_id + - ingest_metadata: + - metadata: + - lab_id + - slide_id + - cedar_mapped_metadata: + - Lab ID + - Slide ID # Dataset can be either derivation source or target derivation: source: true @@ -812,6 +823,9 @@ ENTITIES: derivation: source: false target: true + excluded_properties_from_public_response: + - lab_source_id + - label properties: <<: *shared_properties <<: *shared_entity_properties @@ -930,6 +944,20 @@ ENTITIES: derivation: source: true target: true + excluded_properties_from_public_response: + - lab_tissue_sample_id + - origin_sample: + - lab_tissue_sample_id + - origin_samples: + - lab_tissue_sample_id + - source: + - lab_source_id + - metadata: + - lab_id + - slide_id + - cedar_mapped_metadata: + - Lab ID + - Slide ID properties: <<: *shared_properties <<: *shared_entity_properties diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 5aae28b..9f7c992 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -298,6 +298,77 @@ def get_all_entity_types(): return list(dict_keys) +def get_fields_to_exclude(normalized_class=None): + """Retrieves fields designated in the provenance schema yaml under + excluded_properties_from_public_response and returns the fields in a list. + + Parameters + ---------- + normalized_class : Optional[str] + the normalized entity type of the entity who's fields are to be removed + + Returns + ------- + list[str] + A list of strings where each entry is a field to be excluded + """ + # Determine the schema section based on class + excluded_fields = [] + schema_section = _schema['ENTITIES'] + exclude_list = schema_section[normalized_class].get('excluded_properties_from_public_response') + if exclude_list: + excluded_fields.extend(exclude_list) + return excluded_fields + + +def exclude_properties_from_response(excluded_fields, output_dict): + """Removes specified fields from an existing dictionary. + + Parameters + ---------- + excluded_fields : list + A list of the fields to be excluded + output_dict : dictionary + A dictionary representing the data to be modified + + Returns + ------- + dict + The modified data with removed fields + """ + def delete_nested_field(data, nested_path): + if isinstance(nested_path, dict): + for key, value in nested_path.items(): + if key in data: + if isinstance(value, list): + for nested_field in value: + if isinstance(nested_field, dict): + delete_nested_field(data[key], nested_field) + + elif isinstance(data[key], list): + for item in data[key]: + if nested_field in item: + del item[nested_field] + + elif nested_field in data[key]: + del data[key][nested_field] + elif isinstance(value, dict): + delete_nested_field(data[key], value) + + elif nested_path in data: + if isinstance(data[nested_path], list): + for item in data[nested_path]: + if nested_path in item: + del item[nested_path] + else: + del data[nested_path] + + for field in excluded_fields: + delete_nested_field(output_dict, field) + + return output_dict + + """ Generating triggered data based on the target events and methods @@ -2045,6 +2116,12 @@ def generate_activity_data(normalized_entity_type, user_token, user_info_dict, c return generated_activity_data_dict + + + + + + """ Get the ingest-api URL to be used by trigger methods From 5def30ae4513df2586b751eb9c67dd3d8a3788ac Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Wed, 9 Oct 2024 13:36:29 -0400 Subject: [PATCH 2/5] Adding data access level check to descendents --- src/app.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/app.py b/src/app.py index 59b3c0b..f66cfd5 100644 --- a/src/app.py +++ b/src/app.py @@ -1688,16 +1688,31 @@ def get_ancestors(id): def get_descendants(id): final_result = [] - # Get user token from Authorization header - user_token = get_user_token(request) + # Use the internal token to query the target entity + # since public entities don't require user token + token = get_internal_token() # Make sure the id exists in uuid-api and # the corresponding entity also exists in neo4j entity_dict = query_target_entity(id) + normalized_entity_type = entity_dict['entity_type'] uuid = entity_dict['uuid'] - # Collection and Upload don't have descendants via Activity nodes - # No need to check, it'll always return empty list + if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): + # Only published/public datasets don't require token + if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + # Token is required and the user must belong to SenNet-READ group + token = get_user_token(request, non_public_access_required=True) + elif normalized_entity_type == 'Sample' or normalized_entity_type == 'Source': + # The `data_access_level` of Sample/Source can only be either 'public' or 'consortium' + if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM: + token = get_user_token(request, non_public_access_required=True) + elif normalized_entity_type == 'Upload': + # Uploads are always consortium level + token = get_user_token(request, non_public_access_required=True) + return jsonify(final_result) + else: + return jsonify(final_result) # Result filtering based on query string if bool(request.args): @@ -1737,7 +1752,7 @@ def get_descendants(id): 'previous_revision_uuids' ] - complete_entities_list = schema_manager.get_complete_entities_list(user_token, descendants_list, properties_to_skip) + complete_entities_list = schema_manager.get_complete_entities_list(token, descendants_list, properties_to_skip) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list, properties_to_include=['protocol_url']) @@ -3947,7 +3962,7 @@ def validate_constraints(entry_json: list): str The token string if valid """ -def get_user_token(request, non_public_access_required = False): +def get_user_token(request, non_public_access_required=False): # Get user token from Authorization header # getAuthorizationTokens() also handles MAuthorization header but we are not using that here try: From 990b06ad8e4ba01c34deb4e2f6eb46595403dfc7 Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Wed, 9 Oct 2024 13:44:48 -0400 Subject: [PATCH 3/5] Adding public excluded properties to descendents --- src/app.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/app.py b/src/app.py index f66cfd5..15b2da6 100644 --- a/src/app.py +++ b/src/app.py @@ -1697,16 +1697,19 @@ def get_descendants(id): entity_dict = query_target_entity(id) normalized_entity_type = entity_dict['entity_type'] uuid = entity_dict['uuid'] + public_entity = True if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): # Only published/public datasets don't require token if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: # Token is required and the user must belong to SenNet-READ group token = get_user_token(request, non_public_access_required=True) + public_entity = False elif normalized_entity_type == 'Sample' or normalized_entity_type == 'Source': # The `data_access_level` of Sample/Source can only be either 'public' or 'consortium' if entity_dict['data_access_level'] == ACCESS_LEVEL_CONSORTIUM: token = get_user_token(request, non_public_access_required=True) + public_entity = False elif normalized_entity_type == 'Upload': # Uploads are always consortium level token = get_user_token(request, non_public_access_required=True) @@ -1757,6 +1760,16 @@ def get_descendants(id): # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list, properties_to_include=['protocol_url']) + if public_entity and not user_in_sennet_read_group(request): + filtered_final_result = [] + for ancestor in final_result: + ancestor_entity_type = ancestor.get('entity_type') + fields_to_exclude = schema_manager.get_fields_to_exclude(ancestor_entity_type) + filtered_ancestor = schema_manager.exclude_properties_from_response(fields_to_exclude, ancestor) + filtered_final_result.append(filtered_ancestor) + + final_result = filtered_final_result + return jsonify(final_result) From 58f888146738b0cb62efb266bcfa6c3582e97ed4 Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Wed, 9 Oct 2024 14:03:04 -0400 Subject: [PATCH 4/5] Removing lab_dataset_id from test responses --- test/data/get_ancestors_success_dataset.json | 4 ---- test/data/get_descendants_success_dataset.json | 2 -- test/data/get_entity_by_id_success_dataset.json | 1 - 3 files changed, 7 deletions(-) diff --git a/test/data/get_ancestors_success_dataset.json b/test/data/get_ancestors_success_dataset.json index bfe7718..3c74b31 100644 --- a/test/data/get_ancestors_success_dataset.json +++ b/test/data/get_ancestors_success_dataset.json @@ -55,7 +55,6 @@ "uuid": "5fa78f4fa272db58a7fcc7590376f5e7", "source": { "metadata": {}, - "lab_source_id": "Human Source 1", "group_name": "CODCC Testing Group", "sennet_id": "SNT522.GDLF.724", "last_modified_timestamp": 1681844922032, @@ -83,7 +82,6 @@ "entity_type": "Sample", "group_name": "CODCC Testing Group", "group_uuid": "57192604-18e0-11ed-b79b-972795fc9504", - "lab_tissue_sample_id": "Human Blood", "last_modified_timestamp": 1681828779121, "last_modified_user_displayname": "Test User", "last_modified_user_email": "TESTUSER@example.com", @@ -94,7 +92,6 @@ "uuid": "cf3d0408de9afd703c8bd71808176b38", "source": { "metadata": {}, - "lab_source_id": "Human Source 1", "group_name": "CODCC Testing Group", "sennet_id": "SNT522.GDLF.724", "last_modified_timestamp": 1681844922032, @@ -123,7 +120,6 @@ "entity_type": "Source", "group_name": "CODCC Testing Group", "group_uuid": "57192604-18e0-11ed-b79b-972795fc9504", - "lab_source_id": "Human Source 1", "last_modified_timestamp": 1681844922032, "last_modified_user_displayname": "Test User", "last_modified_user_email": "TESTUSER@example.com", diff --git a/test/data/get_descendants_success_dataset.json b/test/data/get_descendants_success_dataset.json index f7cefdc..564dad7 100644 --- a/test/data/get_descendants_success_dataset.json +++ b/test/data/get_descendants_success_dataset.json @@ -13,7 +13,6 @@ "entity_type": "Dataset", "group_name": "CODCC Testing Group", "group_uuid": "57192604-18e0-11ed-b79b-972795fc9504", - "lab_dataset_id": "Vitessce example - secondary dataset", "last_modified_timestamp": 1681841991272, "last_modified_user_displayname": "Test User", "last_modified_user_email": "TESTUSER@example.com", @@ -26,7 +25,6 @@ "sources": [ { "metadata": {}, - "lab_source_id": "Human Source 1", "group_name": "CODCC Testing Group", "sennet_id": "SNT522.GDLF.724", "last_modified_timestamp": 1681844922032, diff --git a/test/data/get_entity_by_id_success_dataset.json b/test/data/get_entity_by_id_success_dataset.json index 53c80ab..75c7a2e 100644 --- a/test/data/get_entity_by_id_success_dataset.json +++ b/test/data/get_entity_by_id_success_dataset.json @@ -49,7 +49,6 @@ "entity_type": "Dataset", "group_name": "University of Pittsburgh TMC", "group_uuid": "28db7a2b-ed8a-11ec-8b0a-9fe9b51132b1", - "lab_dataset_id": "897-Dataset", "last_modified_timestamp": 1683227917901, "last_modified_user_displayname": "Test User", "last_modified_user_email": "TESTUSER@example.com", From 2de034f46dbcf98989f1cb0f204625d936706ed4 Mon Sep 17 00:00:00 2001 From: Tyler Madonna Date: Wed, 9 Oct 2024 16:52:45 -0400 Subject: [PATCH 5/5] Restricting ancestors and descendants responses --- src/app.py | 16 +++++++++++----- src/app_neo4j_queries.py | 24 ++++++++++++++++++------ 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/app.py b/src/app.py index 15b2da6..bf99e88 100644 --- a/src/app.py +++ b/src/app.py @@ -1610,6 +1610,9 @@ def get_ancestors(id): # So no need to execute the code below return jsonify(final_result) + authorized = user_in_sennet_read_group(request) + data_access_level = 'public' if authorized is False else None + # By now, either the entity is public accessible or the user token has the correct access level # Result filtering based on query string if bool(request.args): @@ -1623,7 +1626,7 @@ def get_ancestors(id): abort_bad_req(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(result_filtering_accepted_property_keys)}") # Only return a list of the filtered property value of each entity - property_list = app_neo4j_queries.get_ancestors(neo4j_driver_instance, uuid, property_key) + property_list = app_neo4j_queries.get_ancestors(neo4j_driver_instance, uuid, data_access_level, property_key) # Final result final_result = property_list @@ -1631,7 +1634,7 @@ def get_ancestors(id): abort_bad_req("The specified query string is not supported. Use '?property=' to filter the result") # Return all the details if no property filtering else: - ancestors_list = app_neo4j_queries.get_ancestors(neo4j_driver_instance, uuid) + ancestors_list = app_neo4j_queries.get_ancestors(neo4j_driver_instance, uuid, data_access_level) # Generate trigger data # Skip some of the properties that are time-consuming to generate via triggers @@ -1717,6 +1720,9 @@ def get_descendants(id): else: return jsonify(final_result) + authorized = user_in_sennet_read_group(request) + data_access_level = 'public' if authorized is False else None + # Result filtering based on query string if bool(request.args): property_key = request.args.get('property') @@ -1729,7 +1735,7 @@ def get_descendants(id): abort_bad_req(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(result_filtering_accepted_property_keys)}") # Only return a list of the filtered property value of each entity - property_list = app_neo4j_queries.get_descendants(neo4j_driver_instance, uuid, property_key) + property_list = app_neo4j_queries.get_descendants(neo4j_driver_instance, uuid, data_access_level, property_key) # Final result final_result = property_list @@ -1737,7 +1743,7 @@ def get_descendants(id): abort_bad_req("The specified query string is not supported. Use '?property=' to filter the result") # Return all the details if no property filtering else: - descendants_list = app_neo4j_queries.get_descendants(neo4j_driver_instance, uuid) + descendants_list = app_neo4j_queries.get_descendants(neo4j_driver_instance, uuid, data_access_level) # Generate trigger data and merge into a big dict # and skip some of the properties that are time-consuming to generate via triggers @@ -1760,7 +1766,7 @@ def get_descendants(id): # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list, properties_to_include=['protocol_url']) - if public_entity and not user_in_sennet_read_group(request): + if public_entity and not authorized: filtered_final_result = [] for ancestor in final_result: ancestor_entity_type = ancestor.get('entity_type') diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index ddafcb9..ee3458a 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -558,7 +558,7 @@ def update_entity(neo4j_driver, entity_type, entity_data_dict, uuid): raise TransactionError(msg) -def get_ancestors(neo4j_driver, uuid, property_key=None): +def get_ancestors(neo4j_driver, uuid, data_access_level=None, property_key=None): """Get all ancestors by uuid. Parameters @@ -567,6 +567,8 @@ def get_ancestors(neo4j_driver, uuid, property_key=None): The neo4j database connection pool uuid : str The uuid of target entity + data_access_level : Optional[str] + The data access level of the ancestor entities (public or consortium). None returns all ancestors. property_key : str A target property key for result filtering @@ -577,17 +579,21 @@ def get_ancestors(neo4j_driver, uuid, property_key=None): """ results = [] + predicate = '' + if data_access_level: + predicate = f"AND ancestor.data_access_level = '{data_access_level}' " + if property_key: query = (f"MATCH (e:Entity)-[:USED|WAS_GENERATED_BY*]->(ancestor:Entity) " # Filter out the Lab entities - f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' " + f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' {predicate}" # COLLECT() returns a list # apoc.coll.toSet() reruns a set containing unique nodes f"RETURN apoc.coll.toSet(COLLECT(ancestor.{property_key})) AS {record_field_name}") else: query = (f"MATCH (e:Entity)-[:USED|WAS_GENERATED_BY*]->(ancestor:Entity) " # Filter out the Lab entities - f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' " + f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' {predicate}" # COLLECT() returns a list # apoc.coll.toSet() reruns a set containing unique nodes f"RETURN apoc.coll.toSet(COLLECT(ancestor)) AS {record_field_name}") @@ -614,7 +620,7 @@ def get_ancestors(neo4j_driver, uuid, property_key=None): return results -def get_descendants(neo4j_driver, uuid, property_key=None): +def get_descendants(neo4j_driver, uuid, data_access_level=None, property_key=None): """ Get all descendants by uuid Parameters @@ -623,6 +629,8 @@ def get_descendants(neo4j_driver, uuid, property_key=None): The neo4j database connection pool uuid : str The uuid of target entity + data_access_level : Optional[str] + The data access level of the descendant entities (public or consortium). None returns all descendants. property_key : str A target property key for result filtering @@ -633,17 +641,21 @@ def get_descendants(neo4j_driver, uuid, property_key=None): """ results = [] + predicate = '' + if data_access_level: + predicate = f"AND descendant.data_access_level = '{data_access_level}' " + if property_key: query = (f"MATCH (e:Entity)<-[:USED|WAS_GENERATED_BY*]-(descendant:Entity) " # The target entity can't be a Lab - f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' " + f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' {predicate}" # COLLECT() returns a list # apoc.coll.toSet() reruns a set containing unique nodes f"RETURN apoc.coll.toSet(COLLECT(descendant.{property_key})) AS {record_field_name}") else: query = (f"MATCH (e:Entity)<-[:USED|WAS_GENERATED_BY*]-(descendant:Entity) " # The target entity can't be a Lab - f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' " + f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' {predicate}" # COLLECT() returns a list # apoc.coll.toSet() reruns a set containing unique nodes f"RETURN apoc.coll.toSet(COLLECT(descendant)) AS {record_field_name}")