Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Checking entities visibility for collections #529

Merged
merged 4 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4618,14 +4618,13 @@ def get_collections(id):
# Get the entity dict from cache if exists
# Otherwise query against uuid-api and neo4j to get the entity dict if the id exists
entity_dict = query_target_entity(id)
normalized_entity_type = entity_dict['entity_type']
uuid = entity_dict['uuid']
public_entity = True

if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
abort_bad_req(f"Unsupported entity type of id {id}: {normalized_entity_type}")
entity_scope = _get_entity_visibility(normalized_entity_type=entity_dict['entity_type'],
entity_dict=entity_dict)

if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
if entity_scope == DataVisibilityEnum.NONPUBLIC:
# Token is required and the user must belong to HuBMAP-READ group
token = get_user_token(request, non_public_access_required=True)
public_entity = False
Expand Down Expand Up @@ -5605,7 +5604,7 @@ def delete_cache(id):
upload_dataset_uuids = schema_neo4j_queries.get_upload_datasets(neo4j_driver_instance, entity_uuid , 'uuid')

# If the target entity is Datasets/Publication, delete the associated Collections cache, Upload cache
collection_uuids = schema_neo4j_queries.get_dataset_collections(neo4j_driver_instance, entity_uuid , 'uuid')
collection_uuids = schema_neo4j_queries.get_entity_collections(neo4j_driver_instance, entity_uuid , 'uuid')
collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid)
upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid)

Expand Down
18 changes: 16 additions & 2 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -444,8 +444,8 @@ ENTITIES:
transient: true
generated: true
description: "A list of collections that this dataset belongs to. Will be returned in response"
on_read_trigger: get_dataset_collections
on_index_trigger: get_dataset_collections
on_read_trigger: get_entity_collections
on_index_trigger: get_entity_collections
# No on_index_trigger to include collections in the OpenSearch document for a Dataset (from HM, we might need this for SN)
upload:
type: json_string # dict
Expand Down Expand Up @@ -888,6 +888,13 @@ ENTITIES:
type: string
required_on_create: true
description: "A high level description of where this source originates from."
collections:
type: list
transient: true
generated: true
description: "A list of collections that this source belongs to. Will be returned in response"
on_read_trigger: get_entity_collections
on_index_trigger: get_entity_collections


############################################# Sample #############################################
Expand Down Expand Up @@ -1149,6 +1156,13 @@ ENTITIES:
immutable: true
description: "A boolean indicating if the Sample contains any Datasets in its descendants."
on_index_trigger: get_contains_data
collections:
type: list
transient: true
generated: true
description: "A list of collections that this sample belongs to. Will be returned in response"
on_read_trigger: get_entity_collections
on_index_trigger: get_entity_collections

############################################# Upload #############################################
# Eventually we might allow folks to bulk register Samples or Sources through the same mechanism.
Expand Down
20 changes: 11 additions & 9 deletions src/schema/schema_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,14 +662,14 @@ def get_next_revision_uuid(neo4j_driver, uuid):


"""
Get a list of associated collection uuids for a given dataset
Get a list of associated collection uuids for a given entity

Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
uuid : str
The uuid of dataset
The uuid of entity
property_key : str
A target property key for result filtering

Expand All @@ -680,7 +680,7 @@ def get_next_revision_uuid(neo4j_driver, uuid):
"""


def get_dataset_collections(neo4j_driver, uuid, property_key=None):
def get_entity_collections(neo4j_driver, uuid, property_key=None):
results = []

if property_key:
Expand All @@ -692,7 +692,7 @@ def get_dataset_collections(neo4j_driver, uuid, property_key=None):
f"WHERE e.uuid = '{uuid}' "
f"RETURN apoc.coll.toSet(COLLECT(c)) AS {record_field_name}")

logger.info("======get_dataset_collections() query======")
logger.info("======get_entity_collections() query======")
logger.info(query)

with neo4j_driver.session() as session:
Expand Down Expand Up @@ -1996,7 +1996,8 @@ def get_tuplets(neo4j_driver, uuid, property_key=None):


"""
Get all collections by uuid
Get all collections by for a given entity uuid

Parameters
----------
neo4j_driver : neo4j.Driver object
Expand All @@ -2005,6 +2006,7 @@ def get_tuplets(neo4j_driver, uuid, property_key=None):
The uuid of target entity
property_key : str
A target property key for result filtering

Returns
-------
list
Expand All @@ -2014,14 +2016,14 @@ def get_collections(neo4j_driver, uuid, property_key = None):
results = []

if property_key:
query = (f"MATCH (c:Collection)<-[:IN_COLLECTION]-(ds:Dataset) "
f"WHERE ds.uuid='{uuid}' "
query = (f"MATCH (c:Collection)<-[:IN_COLLECTION]-(e:Entity) "
f"WHERE e.uuid='{uuid}' "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(c.{property_key})) AS {record_field_name}")
else:
query = (f"MATCH (c:Collection)<-[:IN_COLLECTION]-(ds:Dataset) "
f"WHERE ds.uuid='{uuid}' "
query = (f"MATCH (c:Collection)<-[:IN_COLLECTION]-(e:Entity) "
f"WHERE e.uuid='{uuid}' "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(c)) AS {record_field_name}")
Expand Down
8 changes: 4 additions & 4 deletions src/schema/schema_triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -799,7 +799,7 @@ def set_dataset_status_new(property_key, normalized_type, user_token, existing_d
return property_key, 'New'


def get_dataset_collections(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
def get_entity_collections(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
"""Trigger event method of getting a list of collections for this new Dataset.

Parameters
Expand All @@ -825,15 +825,15 @@ def get_dataset_collections(property_key, normalized_type, user_token, existing_

if 'uuid' not in existing_data_dict:
msg = create_trigger_error_msg(
"Missing 'uuid' key in 'existing_data_dict' during calling 'get_dataset_collections()' trigger method.",
"Missing 'uuid' key in 'existing_data_dict' during calling 'get_entity_collections()' trigger method.",
existing_data_dict, new_data_dict
)
raise KeyError(msg)

# No property key needs to filter the result
# Get back the list of collection dicts
collections_list = schema_neo4j_queries.get_dataset_collections(schema_manager.get_neo4j_driver_instance(),
existing_data_dict['uuid'])
collections_list = schema_neo4j_queries.get_entity_collections(schema_manager.get_neo4j_driver_instance(),
existing_data_dict['uuid'])
if collections_list:
# Exclude datasets from each resulting collection
# We don't want to show too much nested information
Expand Down
Loading