Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
maxsibilla committed Nov 21, 2024
2 parents 8c27370 + 683bc97 commit e36b8bb
Show file tree
Hide file tree
Showing 9 changed files with 202 additions and 40 deletions.
20 changes: 10 additions & 10 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -1153,7 +1153,7 @@ def check_previous_revision(previous_revision_uuid):
'next_revision_uuids',
'previous_revision_uuids'
]
elif normalized_entity_type in ['Upload', 'Collection']:
elif normalized_entity_type in ['Upload', 'Collection', 'Epicollection']:
properties_to_skip = [
'datasets',
'entities'
Expand Down Expand Up @@ -1504,7 +1504,7 @@ def update_entity(id: str, user_token: str, json_data_dict: dict):
if has_dataset_uuids_to_link or has_updated_status:
after_update(normalized_entity_type, user_token, merged_updated_dict)

elif normalized_entity_type == 'Collection':
elif schema_manager.entity_type_instanceof(normalized_entity_type, 'Collection'):
entity_visibility = _get_entity_visibility(normalized_entity_type=normalized_entity_type, entity_dict=entity_dict)

# Prohibit update of an existing Collection if it meets criteria of being visible to public e.g. has DOI.
Expand Down Expand Up @@ -1542,7 +1542,7 @@ def update_entity(id: str, user_token: str, json_data_dict: dict):
'next_revision_uuids',
'previous_revision_uuids'
]
elif normalized_entity_type in ['Upload', 'Collection']:
elif normalized_entity_type in ['Upload', 'Collection', 'Epicollection']:
properties_to_skip = [
'datasets',
'entities'
Expand Down Expand Up @@ -2441,7 +2441,7 @@ def doi_redirect(id):
entity_type = entity_dict['entity_type']

# Only for collection
if entity_type not in ['Collection', 'Dataset', 'Publication']:
if entity_type not in ['Collection', 'Epicollection', 'Dataset', 'Publication']:
abort_bad_req("The target entity of the specified id must be a Collection or Dataset or Publication")

uuid = entity_dict['uuid']
Expand Down Expand Up @@ -4618,14 +4618,13 @@ def get_collections(id):
# Get the entity dict from cache if exists
# Otherwise query against uuid-api and neo4j to get the entity dict if the id exists
entity_dict = query_target_entity(id)
normalized_entity_type = entity_dict['entity_type']
uuid = entity_dict['uuid']
public_entity = True

if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
abort_bad_req(f"Unsupported entity type of id {id}: {normalized_entity_type}")
entity_scope = _get_entity_visibility(normalized_entity_type=entity_dict['entity_type'],
entity_dict=entity_dict)

if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED:
if entity_scope == DataVisibilityEnum.NONPUBLIC:
# Token is required and the user must belong to HuBMAP-READ group
token = get_user_token(request, non_public_access_required=True)
public_entity = False
Expand Down Expand Up @@ -4922,7 +4921,8 @@ def get_entities_for_collection(id: str):
# Verify that the entity is a collection
entity_dict = query_target_entity(id)
entity_type = entity_dict["entity_type"]
if not equals(entity_type, "Collection"):

if not schema_manager.entity_type_instanceof(entity_type, "Collection"):
abort_bad_req(f"{entity_type.title()} with id {id} is not a collection")

# Determine if the entity is publicly visible base on its data, only.
Expand Down Expand Up @@ -5605,7 +5605,7 @@ def delete_cache(id):
upload_dataset_uuids = schema_neo4j_queries.get_upload_datasets(neo4j_driver_instance, entity_uuid , 'uuid')

# If the target entity is Datasets/Publication, delete the associated Collections cache, Upload cache
collection_uuids = schema_neo4j_queries.get_dataset_collections(neo4j_driver_instance, entity_uuid , 'uuid')
collection_uuids = schema_neo4j_queries.get_entity_collections(neo4j_driver_instance, entity_uuid , 'uuid')
collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid)
upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid)

Expand Down
9 changes: 8 additions & 1 deletion src/lib/constraints/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from lib.constraints.sample import *
from lib.constraints.dataset import *
from lib.constraints.publication import *
from lib.constraints.epicollection import *
from deepdiff import DeepDiff

from atlas_consortia_commons.rest import rest_ok, rest_response, StatusCodes, rest_bad_req
Expand All @@ -21,18 +22,24 @@ def build_sample_constraints(entity) -> list:
def build_dataset_constraints(entity) -> list:
return build_all_dataset_constraints(entity)


def build_publication_constraints(entity) -> list:
return build_all_publication_constraints(entity)


def build_epicollection_constraints(entity) -> list:
return build_all_epicollection_constraints(entity)


def determine_constraint_from_entity(constraint_unit, use_case=None) -> dict:
entity_type = constraint_unit.get('entity_type', '')
entity_type = entity_type.lower()
sub_type = constraint_unit.get('sub_type')
error = None
constraints = []
entities = Ontology.ops(as_arr=True, cb=enum_val_lower).entities()

# Need to manually add Epicollection
entities.append('epicollection')
if entity_type not in entities:
error = f"No `entity_type` found with value `{entity_type}`"
else:
Expand Down
21 changes: 21 additions & 0 deletions src/lib/constraints/epicollection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from lib.constraints.base import build_constraint, build_constraint_unit, build_search_constraint_unit
from lib.ontology import Ontology


# can be the descendant of / --->
def build_all_epicollection_constraints(entity):

ancestor = build_constraint_unit(Ontology.ops().entities().DATASET)
descendant = build_constraint_unit(entity)

return [
build_constraint(ancestor, [descendant])
]

def build_epicollection_search_constraints(entity):
descendant = build_constraint_unit(entity)
ancestor = build_search_constraint_unit('entity_type.keyword', Ontology.ops().entities().DATASET)

return [
build_constraint([ancestor], [descendant])
]
31 changes: 28 additions & 3 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ ENTITIES:
source: false
target: false
# Collection doesn't actually need data_access_level property
properties:
properties: &shared_collection_properties
<<: *shared_properties
<<: *shared_entity_properties
# Because Collection-specific validation is needed for some
Expand Down Expand Up @@ -444,8 +444,8 @@ ENTITIES:
transient: true
generated: true
description: "A list of collections that this dataset belongs to. Will be returned in response"
on_read_trigger: get_dataset_collections
on_index_trigger: get_dataset_collections
on_read_trigger: get_entity_collections
on_index_trigger: get_entity_collections
# No on_index_trigger to include collections in the OpenSearch document for a Dataset (from HM, we might need this for SN)
upload:
type: json_string # dict
Expand Down Expand Up @@ -888,6 +888,13 @@ ENTITIES:
type: string
required_on_create: true
description: "A high level description of where this source originates from."
collections:
type: list
transient: true
generated: true
description: "A list of collections that this source belongs to. Will be returned in response"
on_read_trigger: get_entity_collections
on_index_trigger: get_entity_collections


############################################# Sample #############################################
Expand Down Expand Up @@ -1149,6 +1156,13 @@ ENTITIES:
immutable: true
description: "A boolean indicating if the Sample contains any Datasets in its descendants."
on_index_trigger: get_contains_data
collections:
type: list
transient: true
generated: true
description: "A list of collections that this sample belongs to. Will be returned in response"
on_read_trigger: get_entity_collections
on_index_trigger: get_entity_collections

############################################# Upload #############################################
# Eventually we might allow folks to bulk register Samples or Sources through the same mechanism.
Expand Down Expand Up @@ -1264,3 +1278,14 @@ ENTITIES:
type: boolean
description: 'Determines if the datasets of an upload are all published.'
on_index_trigger: get_has_all_published_datasets

############################################# EPICollection #############################################
Epicollection:
# This superclass property is optional
superclass: Collection
# EPICollection can not be derivation source but not target
derivation:
source: false
target: false
properties:
<<: *shared_collection_properties
2 changes: 1 addition & 1 deletion src/schema/schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class SchemaConstants(object):

DOI_BASE_URL = 'https://doi.org/'

ALLOWED_SINGLE_CREATION_ACTIONS = ['central process', 'lab process']
ALLOWED_SINGLE_CREATION_ACTIONS = ['central process', 'lab process', 'external process']
ALLOWED_MULTI_CREATION_ACTIONS = ['multi-assay split']

ALLOWED_DATASET_STATUSES = ['new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted', 'incomplete']
Expand Down
57 changes: 46 additions & 11 deletions src/schema/schema_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,39 @@ def get_dataset_direct_descendants(neo4j_driver, uuid, property_key=None, match_

return results


"""
Get the uuids for each entity in a list that doesn't belong to a certain entity type. Uuids are ordered by type
Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
direct_ancestor_uuids : list
List of the uuids to be filtered
entity_type : string
The entity to be excluded
Returns
-------
dict
A dictionary of entity uuids that don't pass the filter, grouped by entity_type
"""


def filter_ancestors_by_type(neo4j_driver, direct_ancestor_uuids, entity_type):
query = (f"MATCH (e:Entity) "
f"WHERE e.uuid in {direct_ancestor_uuids} AND toLower(e.entity_type) <> '{entity_type.lower()}' "
f"RETURN e.entity_type AS entity_type, collect(e.uuid) AS uuids")
logger.info("======filter_ancestors_by_type======")
logger.info(query)

with neo4j_driver.session() as session:
records = session.run(query).data()

return records if records else None


"""
Get the origin (organ) sample ancestor of a given entity by uuid
Expand Down Expand Up @@ -662,14 +695,14 @@ def get_next_revision_uuid(neo4j_driver, uuid):


"""
Get a list of associated collection uuids for a given dataset
Get a list of associated collection uuids for a given entity
Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
uuid : str
The uuid of dataset
The uuid of entity
property_key : str
A target property key for result filtering
Expand All @@ -680,7 +713,7 @@ def get_next_revision_uuid(neo4j_driver, uuid):
"""


def get_dataset_collections(neo4j_driver, uuid, property_key=None):
def get_entity_collections(neo4j_driver, uuid, property_key=None):
results = []

if property_key:
Expand All @@ -692,7 +725,7 @@ def get_dataset_collections(neo4j_driver, uuid, property_key=None):
f"WHERE e.uuid = '{uuid}' "
f"RETURN apoc.coll.toSet(COLLECT(c)) AS {record_field_name}")

logger.info("======get_dataset_collections() query======")
logger.info("======get_entity_collections() query======")
logger.info(query)

with neo4j_driver.session() as session:
Expand Down Expand Up @@ -768,7 +801,7 @@ def get_dataset_upload(neo4j_driver, uuid, property_key=None):
def get_collection_entities(neo4j_driver, uuid):
results = []

query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection) "
query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection|Epicollection) "
f"WHERE c.uuid = '{uuid}' "
f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}")

Expand Down Expand Up @@ -1370,7 +1403,7 @@ def _delete_publication_associated_collection_linkages_tx(tx, uuid):


def _delete_collection_linkages_tx(tx, uuid):
query = (f"MATCH (d:Dataset)-[in:IN_COLLECTION]->(c:Collection)"
query = (f"MATCH (e:Entity)-[in:IN_COLLECTION]->(c:Collection)"
f" WHERE c.uuid = '{uuid}' "
f" DELETE in")

Expand Down Expand Up @@ -1996,7 +2029,8 @@ def get_tuplets(neo4j_driver, uuid, property_key=None):


"""
Get all collections by uuid
Get all collections by for a given entity uuid
Parameters
----------
neo4j_driver : neo4j.Driver object
Expand All @@ -2005,6 +2039,7 @@ def get_tuplets(neo4j_driver, uuid, property_key=None):
The uuid of target entity
property_key : str
A target property key for result filtering
Returns
-------
list
Expand All @@ -2014,14 +2049,14 @@ def get_collections(neo4j_driver, uuid, property_key = None):
results = []

if property_key:
query = (f"MATCH (c:Collection)<-[:IN_COLLECTION]-(ds:Dataset) "
f"WHERE ds.uuid='{uuid}' "
query = (f"MATCH (c:Collection)<-[:IN_COLLECTION]-(e:Entity) "
f"WHERE e.uuid='{uuid}' "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(c.{property_key})) AS {record_field_name}")
else:
query = (f"MATCH (c:Collection)<-[:IN_COLLECTION]-(ds:Dataset) "
f"WHERE ds.uuid='{uuid}' "
query = (f"MATCH (c:Collection)<-[:IN_COLLECTION]-(e:Entity) "
f"WHERE e.uuid='{uuid}' "
# COLLECT() returns a list
# apoc.coll.toSet() reruns a set containing unique nodes
f"RETURN apoc.coll.toSet(COLLECT(c)) AS {record_field_name}")
Expand Down
13 changes: 9 additions & 4 deletions src/schema/schema_triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -799,7 +799,7 @@ def set_dataset_status_new(property_key, normalized_type, user_token, existing_d
return property_key, 'New'


def get_dataset_collections(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
def get_entity_collections(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
"""Trigger event method of getting a list of collections for this new Dataset.
Parameters
Expand All @@ -825,15 +825,15 @@ def get_dataset_collections(property_key, normalized_type, user_token, existing_

if 'uuid' not in existing_data_dict:
msg = create_trigger_error_msg(
"Missing 'uuid' key in 'existing_data_dict' during calling 'get_dataset_collections()' trigger method.",
"Missing 'uuid' key in 'existing_data_dict' during calling 'get_entity_collections()' trigger method.",
existing_data_dict, new_data_dict
)
raise KeyError(msg)

# No property key needs to filter the result
# Get back the list of collection dicts
collections_list = schema_neo4j_queries.get_dataset_collections(schema_manager.get_neo4j_driver_instance(),
existing_data_dict['uuid'])
collections_list = schema_neo4j_queries.get_entity_collections(schema_manager.get_neo4j_driver_instance(),
existing_data_dict['uuid'])
if collections_list:
# Exclude datasets from each resulting collection
# We don't want to show too much nested information
Expand Down Expand Up @@ -927,6 +927,11 @@ def link_collection_to_entities(property_key, normalized_type, user_token, exist
schema_neo4j_queries.link_collection_to_entities(neo4j_driver=schema_manager.get_neo4j_driver_instance(),
collection_uuid=existing_data_dict['uuid'],
entities_uuid_list=entity_uuids)

# Delete the cache of each associated dataset and the collection itself if any cache exists
# Because the `Dataset.collecctions` field and `Collection.datasets` field
uuids_list = [existing_data_dict['uuid']] + entity_uuids
schema_manager.delete_memcached_cache(uuids_list)
except TransactionError:
# No need to log
raise
Expand Down
Loading

0 comments on commit e36b8bb

Please sign in to comment.