Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tjmadonna/523 epicollections #526

Merged
merged 15 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -1153,7 +1153,7 @@ def check_previous_revision(previous_revision_uuid):
'next_revision_uuids',
'previous_revision_uuids'
]
elif normalized_entity_type in ['Upload', 'Collection']:
elif normalized_entity_type in ['Upload', 'Collection', 'Epicollection']:
properties_to_skip = [
'datasets',
'entities'
Expand Down Expand Up @@ -1504,7 +1504,7 @@ def update_entity(id: str, user_token: str, json_data_dict: dict):
if has_dataset_uuids_to_link or has_updated_status:
after_update(normalized_entity_type, user_token, merged_updated_dict)

elif normalized_entity_type == 'Collection':
elif schema_manager.entity_type_instanceof(normalized_entity_type, 'Collection'):
entity_visibility = _get_entity_visibility(normalized_entity_type=normalized_entity_type, entity_dict=entity_dict)

# Prohibit update of an existing Collection if it meets criteria of being visible to public e.g. has DOI.
Expand Down Expand Up @@ -1542,7 +1542,7 @@ def update_entity(id: str, user_token: str, json_data_dict: dict):
'next_revision_uuids',
'previous_revision_uuids'
]
elif normalized_entity_type in ['Upload', 'Collection']:
elif normalized_entity_type in ['Upload', 'Collection', 'Epicollection']:
properties_to_skip = [
'datasets',
'entities'
Expand Down Expand Up @@ -2441,7 +2441,7 @@ def doi_redirect(id):
entity_type = entity_dict['entity_type']

# Only for collection
if entity_type not in ['Collection', 'Dataset', 'Publication']:
if entity_type not in ['Collection', 'Epicollection', 'Dataset', 'Publication']:
abort_bad_req("The target entity of the specified id must be a Collection or Dataset or Publication")

uuid = entity_dict['uuid']
Expand Down Expand Up @@ -4922,7 +4922,8 @@ def get_entities_for_collection(id: str):
# Verify that the entity is a collection
entity_dict = query_target_entity(id)
entity_type = entity_dict["entity_type"]
if not equals(entity_type, "Collection"):

if not schema_manager.entity_type_instanceof(entity_type, "Collection"):
abort_bad_req(f"{entity_type.title()} with id {id} is not a collection")

# Determine if the entity is publicly visible base on its data, only.
Expand Down
9 changes: 8 additions & 1 deletion src/lib/constraints/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from lib.constraints.sample import *
from lib.constraints.dataset import *
from lib.constraints.publication import *
from lib.constraints.epicollection import *
from deepdiff import DeepDiff

from atlas_consortia_commons.rest import rest_ok, rest_response, StatusCodes, rest_bad_req
Expand All @@ -21,18 +22,24 @@ def build_sample_constraints(entity) -> list:
def build_dataset_constraints(entity) -> list:
return build_all_dataset_constraints(entity)


def build_publication_constraints(entity) -> list:
return build_all_publication_constraints(entity)


def build_epicollection_constraints(entity) -> list:
return build_all_epicollection_constraints(entity)


def determine_constraint_from_entity(constraint_unit, use_case=None) -> dict:
entity_type = constraint_unit.get('entity_type', '')
entity_type = entity_type.lower()
sub_type = constraint_unit.get('sub_type')
error = None
constraints = []
entities = Ontology.ops(as_arr=True, cb=enum_val_lower).entities()

# Need to manually add Epicollection
entities.append('epicollection')
if entity_type not in entities:
error = f"No `entity_type` found with value `{entity_type}`"
else:
Expand Down
21 changes: 21 additions & 0 deletions src/lib/constraints/epicollection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from lib.constraints.base import build_constraint, build_constraint_unit, build_search_constraint_unit
from lib.ontology import Ontology


# can be the descendant of / --->
def build_all_epicollection_constraints(entity):

ancestor = build_constraint_unit(Ontology.ops().entities().DATASET)
descendant = build_constraint_unit(entity)

return [
build_constraint(ancestor, [descendant])
]

def build_epicollection_search_constraints(entity):
descendant = build_constraint_unit(entity)
ancestor = build_search_constraint_unit('entity_type.keyword', Ontology.ops().entities().DATASET)

return [
build_constraint([ancestor], [descendant])
]
13 changes: 12 additions & 1 deletion src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ ENTITIES:
source: false
target: false
# Collection doesn't actually need data_access_level property
properties:
properties: &shared_collection_properties
<<: *shared_properties
<<: *shared_entity_properties
# Because Collection-specific validation is needed for some
Expand Down Expand Up @@ -1264,3 +1264,14 @@ ENTITIES:
type: boolean
description: 'Determines if the datasets of an upload are all published.'
on_index_trigger: get_has_all_published_datasets

############################################# EPICollection #############################################
Epicollection:
# This superclass property is optional
superclass: Collection
# EPICollection can not be derivation source but not target
derivation:
source: false
target: false
properties:
<<: *shared_collection_properties
2 changes: 1 addition & 1 deletion src/schema/schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class SchemaConstants(object):

DOI_BASE_URL = 'https://doi.org/'

ALLOWED_SINGLE_CREATION_ACTIONS = ['central process', 'lab process']
ALLOWED_SINGLE_CREATION_ACTIONS = ['central process', 'lab process', 'external process']
ALLOWED_MULTI_CREATION_ACTIONS = ['multi-assay split']

ALLOWED_DATASET_STATUSES = ['new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted', 'incomplete']
Expand Down
37 changes: 35 additions & 2 deletions src/schema/schema_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,39 @@ def get_dataset_direct_descendants(neo4j_driver, uuid, property_key=None, match_

return results


"""
Get the uuids for each entity in a list that doesn't belong to a certain entity type. Uuids are ordered by type

Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
direct_ancestor_uuids : list
List of the uuids to be filtered
entity_type : string
The entity to be excluded

Returns
-------
dict
A dictionary of entity uuids that don't pass the filter, grouped by entity_type
"""


def filter_ancestors_by_type(neo4j_driver, direct_ancestor_uuids, entity_type):
query = (f"MATCH (e:Entity) "
f"WHERE e.uuid in {direct_ancestor_uuids} AND toLower(e.entity_type) <> '{entity_type.lower()}' "
f"RETURN e.entity_type AS entity_type, collect(e.uuid) AS uuids")
logger.info("======filter_ancestors_by_type======")
logger.info(query)

with neo4j_driver.session() as session:
records = session.run(query).data()

return records if records else None


"""
Get the origin (organ) sample ancestor of a given entity by uuid

Expand Down Expand Up @@ -768,7 +801,7 @@ def get_dataset_upload(neo4j_driver, uuid, property_key=None):
def get_collection_entities(neo4j_driver, uuid):
results = []

query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection) "
query = (f"MATCH (e:Entity)-[:IN_COLLECTION]->(c:Collection|Epicollection) "
f"WHERE c.uuid = '{uuid}' "
f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}")

Expand Down Expand Up @@ -1370,7 +1403,7 @@ def _delete_publication_associated_collection_linkages_tx(tx, uuid):


def _delete_collection_linkages_tx(tx, uuid):
query = (f"MATCH (d:Dataset)-[in:IN_COLLECTION]->(c:Collection)"
query = (f"MATCH (e:Entity)-[in:IN_COLLECTION]->(c:Collection)"
f" WHERE c.uuid = '{uuid}' "
f" DELETE in")

Expand Down
5 changes: 5 additions & 0 deletions src/schema/schema_triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -927,6 +927,11 @@ def link_collection_to_entities(property_key, normalized_type, user_token, exist
schema_neo4j_queries.link_collection_to_entities(neo4j_driver=schema_manager.get_neo4j_driver_instance(),
collection_uuid=existing_data_dict['uuid'],
entities_uuid_list=entity_uuids)

# Delete the cache of each associated dataset and the collection itself if any cache exists
# Because the `Dataset.collecctions` field and `Collection.datasets` field
uuids_list = [existing_data_dict['uuid']] + entity_uuids
schema_manager.delete_memcached_cache(uuids_list)
except TransactionError:
# No need to log
raise
Expand Down
81 changes: 77 additions & 4 deletions src/schema/schema_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,71 @@ def validate_no_duplicates_in_list(property_key, normalized_entity_type, request
raise ValueError(f"The {property_key} field must only contain unique items")


"""
Validate every entity exists and (optionally) is a Dataset

Parameters
----------
property_key : str
The target property key
normalized_type : str
Submission
request: Flask request object
The instance of Flask request passed in from application request
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
The json data in request body, already after the regular validations
"""
def collection_entities_are_existing_entities(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
# `entity_uuids` is required for creating a Collection
# Verify each UUID specified exists in the uuid-api, exists in Neo4j, and (optionally) is for a Dataset before
# proceeding with creation of Collection.
bad_entities_uuids = []
for entity_uuid in new_data_dict['entity_uuids']:
try:
# The following code duplicates some functionality existing in app.py, in
# query_target_entity(), which also deals with caching. In the future, the
# validation logic shared by this file and app.py should become a utility
# module, shared by validators as well as app.py. But for now, the code
# is repeated for the following.

# Get cached ids if exist otherwise retrieve from UUID-API. Expect an
# Exception to be raised if not found.
entity_detail = schema_manager.get_sennet_ids(id=entity_uuid)
entity_uuid = entity_detail['uuid']

# If the uuid exists per the uuid-api, make sure it also exists as a Neo4j entity.
entity_dict = schema_neo4j_queries.get_entity(schema_manager.get_neo4j_driver_instance(), entity_uuid)

# If dataset_uuid is not found in Neo4j fail the validation.
if not entity_dict:
logger.info(f"Request for {entity_uuid} inclusion in Collection, "
"but not found in Neo4j.")
bad_entities_uuids.append(entity_uuid)
continue

# Collections can have other entity types besides Dataset, so skip the Dataset check
if normalized_entity_type == 'Collection':
continue

if entity_dict['entity_type'] != 'Dataset':
logger.info(f"Request for {entity_uuid} inclusion in Collection, "
f"but entity_type={entity_dict['entity_type']}, not Dataset.")
bad_entities_uuids.append(entity_uuid)
except Exception:
# If the entity_uuid is not found, fail the validation.
logger.info(f"Request for {entity_uuid} inclusion in Collection "
"failed uuid-api retrieval.")
bad_entities_uuids.append(entity_uuid)

# If any uuids in the request entities_uuids are not for an existing Dataset entity which
# exists in uuid-api and Neo4j, raise an Exception so the validation fails and the
# operation can be rejected.
if bad_entities_uuids:
raise ValueError(f"Unable to find Datasets for {bad_entities_uuids}.")


"""
If an entity has a DOI, do not allow it to be updated
"""
Expand Down Expand Up @@ -490,13 +555,21 @@ def validate_publication_date(property_key, normalized_entity_type, request, exi


def validate_creation_action(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
accepted_creation_action_values = SchemaConstants.ALLOWED_SINGLE_CREATION_ACTIONS
creation_action = new_data_dict.get(property_key)
if creation_action and creation_action.lower() not in accepted_creation_action_values:
raise ValueError("Invalid {} value. Accepted values are: {}".format(property_key, ", ".join(accepted_creation_action_values)))
creation_action = new_data_dict[property_key].lower() # raise key error if not found
if creation_action == '':
raise ValueError(f"The property {property_key} cannot be empty, when specified.")

accepted_creation_action_values = SchemaConstants.ALLOWED_SINGLE_CREATION_ACTIONS
if creation_action not in accepted_creation_action_values:
raise ValueError("Invalid {} value. Accepted values are: {}".format(property_key, ", ".join(accepted_creation_action_values)))

if creation_action == 'external process':
direct_ancestor_uuids = new_data_dict.get('direct_ancestor_uuids')
entity_types_dict = schema_neo4j_queries.filter_ancestors_by_type(schema_manager.get_neo4j_driver_instance(), direct_ancestor_uuids, "dataset")
if entity_types_dict:
raise ValueError("If 'creation_action' field is given and is 'external process', all ancestor uuids must belong to datasets. "
f"The following entities belong to non-dataset entities: {entity_types_dict}")


"""
Validate the provided value of the activity creation action before updating direct ancestors. Certain values prohibited
Expand Down
8 changes: 2 additions & 6 deletions test/test_schema_validators.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
import test

test.cwd_to_src()

import pytest

from schema import schema_validators
Expand All @@ -12,7 +8,7 @@
('central process', True),
('Lab Process', True),
('lab process', True),
(None, True),
(None, False),
('Multi-Assay Split', False),
('multi-assay split', False),
('', False),
Expand All @@ -38,7 +34,7 @@ def test_validate_single_creation_action(creation_action, succeeds):
)
else:
# Test invalid creation action
with pytest.raises(ValueError):
with pytest.raises((ValueError, KeyError)):
schema_validators.validate_creation_action(
property_key, normalized_entity_type, request,
existing_data_dict, new_data_dict
Expand Down
Loading