Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tjmadonna/447 exclude fields from public response #495

Merged
222 changes: 198 additions & 24 deletions src/app.py

Large diffs are not rendered by default.

24 changes: 18 additions & 6 deletions src/app_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ def update_entity(neo4j_driver, entity_type, entity_data_dict, uuid):
raise TransactionError(msg)


def get_ancestors(neo4j_driver, uuid, property_key=None):
def get_ancestors(neo4j_driver, uuid, data_access_level=None, property_key=None):
"""Get all ancestors by uuid.

Parameters
Expand All @@ -567,6 +567,8 @@ def get_ancestors(neo4j_driver, uuid, property_key=None):
The neo4j database connection pool
uuid : str
The uuid of target entity
data_access_level : Optional[str]
The data access level of the ancestor entities (public or consortium). None returns all ancestors.
property_key : str
A target property key for result filtering

Expand All @@ -577,17 +579,21 @@ def get_ancestors(neo4j_driver, uuid, property_key=None):
"""
results = []

predicate = ''
if data_access_level:
predicate = f"AND ancestor.data_access_level = '{data_access_level}' "

if property_key:
query = (f"MATCH (e:Entity)-[:USED|WAS_GENERATED_BY*]->(ancestor:Entity) "
# Filter out the Lab entities
f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' "
f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' {predicate}"
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(ancestor.{property_key})) AS {record_field_name}")
else:
query = (f"MATCH (e:Entity)-[:USED|WAS_GENERATED_BY*]->(ancestor:Entity) "
# Filter out the Lab entities
f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' "
f"WHERE e.uuid='{uuid}' AND ancestor.entity_type <> 'Lab' {predicate}"
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(ancestor)) AS {record_field_name}")
Expand All @@ -614,7 +620,7 @@ def get_ancestors(neo4j_driver, uuid, property_key=None):
return results


def get_descendants(neo4j_driver, uuid, property_key=None):
def get_descendants(neo4j_driver, uuid, data_access_level=None, property_key=None):
""" Get all descendants by uuid

Parameters
Expand All @@ -623,6 +629,8 @@ def get_descendants(neo4j_driver, uuid, property_key=None):
The neo4j database connection pool
uuid : str
The uuid of target entity
data_access_level : Optional[str]
The data access level of the descendant entities (public or consortium). None returns all descendants.
property_key : str
A target property key for result filtering

Expand All @@ -633,17 +641,21 @@ def get_descendants(neo4j_driver, uuid, property_key=None):
"""
results = []

predicate = ''
if data_access_level:
predicate = f"AND descendant.data_access_level = '{data_access_level}' "

if property_key:
query = (f"MATCH (e:Entity)<-[:USED|WAS_GENERATED_BY*]-(descendant:Entity) "
# The target entity can't be a Lab
f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' "
f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' {predicate}"
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(descendant.{property_key})) AS {record_field_name}")
else:
query = (f"MATCH (e:Entity)<-[:USED|WAS_GENERATED_BY*]-(descendant:Entity) "
# The target entity can't be a Lab
f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' "
f"WHERE e.uuid=$uuid AND e.entity_type <> 'Lab' {predicate}"
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(descendant)) AS {record_field_name}")
Expand Down
34 changes: 34 additions & 0 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,12 @@ shared_entity_properties: &shared_entity_properties
before_property_update_validators:
- validate_application_header_before_property_update
description: "The metadata returned from the processing at data submission time."
has_metadata:
type: string
generated: true # Disallow entry from users via POST
immutable: true # Disallow update via PUT
description: "True if the entity has metadata associated with it. Otherwise False."
on_index_trigger: get_has_metadata
was_attributed_to:
type: list
description: "Attribution is the ascribing of an entity to an agent."
Expand Down Expand Up @@ -351,6 +357,17 @@ ENTITIES:
Dataset:
# Only allowed applications can create new Dataset via POST
before_entity_create_validator: validate_application_header_before_entity_create
excluded_properties_from_public_response:
- lab_dataset_id
- sources:
- lab_source_id
- ingest_metadata:
- metadata:
- lab_id
- slide_id
- cedar_mapped_metadata:
- Lab ID
- Slide ID
# Dataset can be either derivation source or target
derivation:
source: true
Expand Down Expand Up @@ -751,6 +768,9 @@ ENTITIES:
derivation:
source: false
target: true
excluded_properties_from_public_response:
- lab_source_id
- label
properties:
<<: *shared_properties
<<: *shared_entity_properties
Expand Down Expand Up @@ -869,6 +889,20 @@ ENTITIES:
derivation:
source: true
target: true
excluded_properties_from_public_response:
- lab_tissue_sample_id
- origin_sample:
- lab_tissue_sample_id
- origin_samples:
- lab_tissue_sample_id
- source:
- lab_source_id
- metadata:
- lab_id
- slide_id
- cedar_mapped_metadata:
- Lab ID
- Slide ID
properties:
<<: *shared_properties
<<: *shared_entity_properties
Expand Down
77 changes: 77 additions & 0 deletions src/schema/schema_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,77 @@ def get_all_entity_types():
return list(dict_keys)


def get_fields_to_exclude(normalized_class=None):
"""Retrieves fields designated in the provenance schema yaml under
excluded_properties_from_public_response and returns the fields in a list.

Parameters
----------
normalized_class : Optional[str]
the normalized entity type of the entity who's fields are to be removed

Returns
-------
list[str]
A list of strings where each entry is a field to be excluded
"""
# Determine the schema section based on class
excluded_fields = []
schema_section = _schema['ENTITIES']
exclude_list = schema_section[normalized_class].get('excluded_properties_from_public_response')
if exclude_list:
excluded_fields.extend(exclude_list)
return excluded_fields


def exclude_properties_from_response(excluded_fields, output_dict):
"""Removes specified fields from an existing dictionary.

Parameters
----------
excluded_fields : list
A list of the fields to be excluded
output_dict : dictionary
A dictionary representing the data to be modified

Returns
-------
dict
The modified data with removed fields
"""
def delete_nested_field(data, nested_path):
if isinstance(nested_path, dict):
for key, value in nested_path.items():
if key in data:
if isinstance(value, list):
for nested_field in value:
if isinstance(nested_field, dict):
delete_nested_field(data[key], nested_field)

elif isinstance(data[key], list):
for item in data[key]:
if nested_field in item:
del item[nested_field]

elif nested_field in data[key]:
del data[key][nested_field]
elif isinstance(value, dict):
delete_nested_field(data[key], value)

elif nested_path in data:
if isinstance(data[nested_path], list):
for item in data[nested_path]:
if nested_path in item:
del item[nested_path]
else:
del data[nested_path]

for field in excluded_fields:
delete_nested_field(output_dict, field)

return output_dict


"""
Generating triggered data based on the target events and methods

Expand Down Expand Up @@ -2045,6 +2116,12 @@ def generate_activity_data(normalized_entity_type, user_token, user_info_dict, c
return generated_activity_data_dict








"""
Get the ingest-api URL to be used by trigger methods

Expand Down
49 changes: 49 additions & 0 deletions src/schema/schema_triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1295,6 +1295,55 @@ def link_to_previous_revision(property_key, normalized_type, user_token, existin
raise


def get_has_metadata(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
"""Trigger event method for determining if the entity has metadata.

Parameters
----------
property_key : str
The target property key
normalized_type : str
One of the types defined in the schema yaml
user_token: str
The user's globus nexus token
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
A merged dictionary that contains all possible input data to be used

Returns
-------
Tuple[str, str]
str: The target property key
str: "True" or "False" if the entity has metadata
"""
if 'uuid' not in existing_data_dict:
msg = create_trigger_error_msg(
"Missing 'uuid' key in 'existing_data_dict' during calling 'get_has_metadata()' trigger method.",
existing_data_dict, new_data_dict
)
raise KeyError(msg)

if equals(Ontology.ops().entities().DATASET, existing_data_dict['entity_type']):
ingest_metadata = existing_data_dict.get('ingest_metadata', {})
has_metadata = 'metadata' in ingest_metadata
return property_key, str(has_metadata)

SpecimenCategories = Ontology.ops().specimen_categories()
if (
equals(Ontology.ops().entities().SOURCE, existing_data_dict['entity_type'])
or equals('Collection', existing_data_dict['entity_type'])
or equals('Publication', existing_data_dict['entity_type'])
or equals(SpecimenCategories.BLOCK, existing_data_dict.get('sample_category'))
or equals(SpecimenCategories.SECTION, existing_data_dict.get('sample_category'))
or equals(SpecimenCategories.SUSPENSION, existing_data_dict.get('sample_category'))
):
has_metadata = 'metadata' in existing_data_dict
return property_key, str(has_metadata)

return property_key, None


def get_source_mapped_metadata(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
"""Trigger event method of auto generating mapped metadata from 'living_donor_data' or 'organ_donor_data'.

Expand Down
5 changes: 0 additions & 5 deletions test/data/get_ancestors_success_dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
"uuid": "5fa78f4fa272db58a7fcc7590376f5e7",
"source": {
"metadata": {},
"lab_source_id": "Human Source 1",
"group_name": "CODCC Testing Group",
"sennet_id": "SNT522.GDLF.724",
"last_modified_timestamp": 1681844922032,
Expand Down Expand Up @@ -83,7 +82,6 @@
"entity_type": "Sample",
"group_name": "CODCC Testing Group",
"group_uuid": "57192604-18e0-11ed-b79b-972795fc9504",
"lab_tissue_sample_id": "Human Blood",
"last_modified_timestamp": 1681828779121,
"last_modified_user_displayname": "Test User",
"last_modified_user_email": "[email protected]",
Expand All @@ -98,7 +96,6 @@
"entity_type": "Sample",
"group_name": "CODCC Testing Group",
"group_uuid": "57192604-18e0-11ed-b79b-972795fc9504",
"lab_tissue_sample_id": "Human Blood",
"last_modified_timestamp": 1681828779121,
"last_modified_user_displayname": "Test User",
"last_modified_user_email": "[email protected]",
Expand All @@ -113,7 +110,6 @@
"uuid": "cf3d0408de9afd703c8bd71808176b38",
"source": {
"metadata": {},
"lab_source_id": "Human Source 1",
"group_name": "CODCC Testing Group",
"sennet_id": "SNT522.GDLF.724",
"last_modified_timestamp": 1681844922032,
Expand Down Expand Up @@ -142,7 +138,6 @@
"entity_type": "Source",
"group_name": "CODCC Testing Group",
"group_uuid": "57192604-18e0-11ed-b79b-972795fc9504",
"lab_source_id": "Human Source 1",
"last_modified_timestamp": 1681844922032,
"last_modified_user_displayname": "Test User",
"last_modified_user_email": "[email protected]",
Expand Down
2 changes: 0 additions & 2 deletions test/data/get_descendants_success_dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
"entity_type": "Dataset",
"group_name": "CODCC Testing Group",
"group_uuid": "57192604-18e0-11ed-b79b-972795fc9504",
"lab_dataset_id": "Vitessce example - secondary dataset",
"last_modified_timestamp": 1681841991272,
"last_modified_user_displayname": "Test User",
"last_modified_user_email": "[email protected]",
Expand All @@ -26,7 +25,6 @@
"sources": [
{
"metadata": {},
"lab_source_id": "Human Source 1",
"group_name": "CODCC Testing Group",
"sennet_id": "SNT522.GDLF.724",
"last_modified_timestamp": 1681844922032,
Expand Down
1 change: 0 additions & 1 deletion test/data/get_entity_by_id_success_dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
"entity_type": "Dataset",
"group_name": "University of Pittsburgh TMC",
"group_uuid": "28db7a2b-ed8a-11ec-8b0a-9fe9b51132b1",
"lab_dataset_id": "897-Dataset",
"last_modified_timestamp": 1683227917901,
"last_modified_user_displayname": "Test User",
"last_modified_user_email": "[email protected]",
Expand Down
Loading