Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update provenance_schema for Publication - #476 #484

Merged
merged 7 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 21 additions & 81 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ ENTITIES:
derivation:
source: true
target: true
properties:
properties: &shared_dataset_properties
<<: *shared_properties
<<: *shared_entity_properties
<<: *doi_properties
Expand Down Expand Up @@ -597,10 +597,11 @@ ENTITIES:
type: string
description: 'The timestamp of when this entity was last modified or published.'
on_index_trigger: get_last_touch
origin_sample:
origin_samples:
type: json_string
description: 'The Sample ancestor that has the sample_category of "Organ".'
on_index_trigger: get_origin_sample
description: 'The Sample ancestors that has the sample_category of "Organ".'
on_read_trigger: get_origin_samples
on_index_trigger: get_origin_samples
assigned_to_group_name:
type: string
description: The group who is responsible for the next step in the ingest process
Expand Down Expand Up @@ -669,87 +670,25 @@ ENTITIES:
source: true
target: true
properties:
error_message:
type: string
description: "An open text field that holds the last error message that arose from pipeline validation or analysis."
dataset_type:
before_create_trigger: set_publication_dataset_type
type: string
generated: true
immutable: true
description: "The assay types of this Dataset. Valid values are from UBKG are queried by schema_manager.get_valueset_dataset_type() using the Ontology API."
<<: *shared_dataset_properties
title:
type: string
description: "The title of the publication."
required_on_create: true # Only required for create via POST, not update via PUT
pipeline_message:
#todo: where is this attribute sourced from? Is it stored in the database? <- Not in neo4j
type: string
ingest_metadata:
type: json_string # dict
description: "The metadata returned from the processing at data submission time."
run_id:
type: string
ingest_id:
type: string
# A user who is a member of multiple groups HAS to send in the group_uuid
group_uuid:
type: string
immutable: true
description: "The uuid of globus group which the user who created this entity is a member of. This is required on Create/POST if the user creating the Donor is a member of more than one write group. This property cannot be set via PUT (only on Create/POST)."
before_create_trigger: set_group_uuid #method that, if group_uuid is not already set looks for membership in a single "data provider" group and sets to that. Otherwise if not set and no single "provider group" membership throws error
# No like image and metadata files handling for Donor/Sample
# Dataset has only one thumbnail file
thumbnail_file:
generated: true
type: json_string
description: "The dataset thumbnail file detail. Stored in db as a stringfied json, e.g., {'filename': 'thumbnail.jpg', 'file_uuid': 'dadasdasdadda'}"
# The updated_peripherally tag is a temporary measure to correctly handle any attributes
# which are potentially updated by multiple triggers
updated_peripherally: true
thumbnail_file_to_add:
type: json_string
transient: true
exposed: false
description: 'Just a temporary file id. Provide as a json object with an temp_file_id like {"temp_file_id":"dzevgd6xjs4d5grmcp4n"}'
before_create_trigger: commit_thumbnail_file
# This before_update_trigger with the same commit process can be used by ingest-api to update the dataset via PUT call
before_update_trigger: commit_thumbnail_file
# The updated_peripherally tag is a temporary measure to correctly handle any attributes
# which are potentially updated by multiple triggers
updated_peripherally: true
thumbnail_file_to_remove:
# This is only valid on update via a PUT request
creation_action:
type: string
transient: true
exposed: false
description: 'The thumbnail image file previously uploaded to delete. Provide as a string of the file_uuid like: "232934234234234234234270c0ea6c51d604a850558ef2247d0b4"'
before_update_trigger: delete_thumbnail_file
# The updated_peripherally tag is a temporary measure to correctly handle any attributes
# which are potentially updated by multiple triggers
updated_peripherally: true
retraction_reason:
type: string
before_property_update_validators:
- validate_if_retraction_permitted
- validate_sub_status_provided
description: 'Information recorded about why a the dataset was retracted.'
sub_status:
type: string
before_property_update_validators:
- validate_if_retraction_permitted
- validate_retraction_reason_provided
- validate_retracted_dataset_sub_status_value
description: 'A sub-status provided to further define the status. The only current allowable value is "Retracted"'
provider_info:
type: string
description: 'Information recorded about the data provider before an analysis pipeline is run on the data.'
dbgap_sra_experiment_url:
type: string
description: 'A URL linking the dataset to the associated uploaded data at dbGaP.'
dbgap_study_url:
generated: true
immutable: true
on_read_trigger: get_creation_action_activity
on_index_trigger: get_creation_action_activity
description: "The activity that was performed."
dataset_type:
before_create_trigger: set_publication_dataset_type
type: string
description: 'A URL linking the dataset to the particular study on dbGap it belongs to'
generated: true
immutable: true
description: "The assay types of this Dataset. Valid values are from UBKG are queried by schema_manager.get_valueset_dataset_type() using the Ontology API."
publication_date:
type: string
description: 'The date of publication'
Expand Down Expand Up @@ -1012,10 +951,11 @@ ENTITIES:
type: string
description: 'The timestamp of when this entity was last modified or published.'
on_index_trigger: get_last_touch
origin_sample:
origin_samples:
type: json_string
description: 'The Sample ancestor that has the sample_category of "Organ".'
on_index_trigger: get_origin_sample
description: 'The list of Sample ancestors that has the sample_category of "Organ".'
on_read_trigger: get_origin_samples
on_index_trigger: get_origin_samples
next_identifier:
type: string
immutable: true
Expand Down
6 changes: 3 additions & 3 deletions src/schema/schema_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,12 @@ def get_dataset_direct_descendants(neo4j_driver, uuid, property_key=None, match_
"""


def get_origin_sample(neo4j_driver, uuid):
def get_origin_samples(neo4j_driver, uuid):
result = {}

query = (f"MATCH (e:Entity)-[:WAS_GENERATED_BY|USED*]->(s:Sample) "
f"WHERE e.uuid='{uuid}' and s.sample_category='Organ' "
f"return s AS {record_field_name}")
f"return apoc.coll.toSet(COLLECT(s)) AS {record_field_name}")

logger.info("======get_origin_sample() query======")
logger.info(query)
Expand All @@ -140,7 +140,7 @@ def get_origin_sample(neo4j_driver, uuid):
record = session.read_transaction(_execute_readonly_tx, query)
if record and record[record_field_name]:
# Convert the entity node to dict
result = _node_to_dict(record[record_field_name])
result = _nodes_to_dicts(record[record_field_name])

return result

Expand Down
21 changes: 11 additions & 10 deletions src/schema/schema_triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1734,7 +1734,7 @@ def get_last_touch(property_key, normalized_type, user_token, existing_data_dict
return property_key, last_touch


def get_origin_sample(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
def get_origin_samples(property_key, normalized_type, user_token, existing_data_dict, new_data_dict):
"""Trigger event method to grab the ancestor of this entity where entity type is Sample and the sample_category is Organ.
Parameters
Expand Down Expand Up @@ -1763,19 +1763,20 @@ def get_origin_sample(property_key, normalized_type, user_token, existing_data_d
# Return the organ if this is an organ
return property_key, existing_data_dict

origin_sample = None
origin_samples = None
if normalized_type in ["Sample", "Dataset", "Publication"]:
origin_sample = schema_neo4j_queries.get_origin_sample(schema_manager.get_neo4j_driver_instance(),
origin_samples = schema_neo4j_queries.get_origin_samples(schema_manager.get_neo4j_driver_instance(),
existing_data_dict['uuid'])

organ_hierarchy_key, organ_hierarchy_value = get_organ_hierarchy(property_key='organ_hierarchy',
normalized_type=Ontology.ops().entities().SAMPLE,
user_token=user_token,
existing_data_dict=origin_sample,
new_data_dict=new_data_dict)
origin_sample[organ_hierarchy_key] = organ_hierarchy_value
for origin_sample in origin_samples:
organ_hierarchy_key, organ_hierarchy_value = get_organ_hierarchy(property_key='organ_hierarchy',
normalized_type=Ontology.ops().entities().SAMPLE,
user_token=user_token,
existing_data_dict=origin_sample,
new_data_dict=new_data_dict)
origin_sample[organ_hierarchy_key] = organ_hierarchy_value

return property_key, origin_sample
return property_key, origin_samples
except Exception:
logger.error(f"No origin sample found for {normalized_type} with UUID: {existing_data_dict['uuid']}")
return property_key, None
Expand Down
19 changes: 19 additions & 0 deletions test/data/get_ancestors_success_dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,25 @@
"last_modified_user_email": "[email protected]",
"last_modified_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d",
"organ": "BD",
"origin_samples": {
"created_by_user_displayname": "Test User",
"created_by_user_email": "[email protected]",
"created_by_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d",
"created_timestamp": 1681828779121,
"data_access_level": "consortium",
"entity_type": "Sample",
"group_name": "CODCC Testing Group",
"group_uuid": "57192604-18e0-11ed-b79b-972795fc9504",
"lab_tissue_sample_id": "Human Blood",
"last_modified_timestamp": 1681828779121,
"last_modified_user_displayname": "Test User",
"last_modified_user_email": "[email protected]",
"last_modified_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d",
"organ": "BD",
"sample_category": "Organ",
"sennet_id": "SNT458.VPHX.635",
"uuid": "cf3d0408de9afd703c8bd71808176b38"
},
"sample_category": "Organ",
"sennet_id": "SNT458.VPHX.635",
"uuid": "cf3d0408de9afd703c8bd71808176b38",
Expand Down
64 changes: 64 additions & 0 deletions test/data/get_entity_by_type_success_sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,38 @@
"last_modified_user_email": "[email protected]",
"last_modified_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d",
"organ": "BR",
"origin_samples": {
"created_by_user_displayname": "Test User",
"created_by_user_email": "[email protected]",
"created_by_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d",
"created_timestamp": 1681828388360,
"data_access_level": "consortium",
"description": "Sample lab notes",
"entity_type": "Sample",
"group_name": "CODCC Testing Group",
"group_uuid": "57192604-18e0-11ed-b79b-972795fc9504",
"image_files": [
{
"description": "Test image",
"file_uuid": "ffff1b46e377b91565ed53464cc8d859",
"filename": "a4fc82ba0010139e33c6209b917ac9c487172222.png"
}
],
"lab_tissue_sample_id": "Human Brain",
"last_modified_timestamp": 1681828388360,
"last_modified_user_displayname": "Test User",
"last_modified_user_email": "[email protected]",
"last_modified_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d",
"organ": "BR",
"protocol_url": "dx.doi.org/10.17504/protocols.io.3byl4j398lo5/v1",
"sample_category": "Organ",
"sennet_id": "SNT834.LVJG.639",
"thumbnail_file": {
"file_uuid": "ffffb2c9be7816087e13580e244855c5",
"filename": "image_handler.jpg"
},
"uuid": "3c4fc147a08429f58856779fcde96f42"
},
"protocol_url": "dx.doi.org/10.17504/protocols.io.3byl4j398lo5/v1",
"sample_category": "Organ",
"sennet_id": "SNT834.LVJG.639",
Expand Down Expand Up @@ -75,6 +107,38 @@
"last_modified_user_email": "[email protected]",
"last_modified_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d",
"organ": "BR",
"origin_samples": {
"created_by_user_displayname": "Test User",
"created_by_user_email": "[email protected]",
"created_by_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d",
"created_timestamp": 1681828388360,
"data_access_level": "consortium",
"description": "Sample lab notes",
"entity_type": "Sample",
"group_name": "CODCC Testing Group",
"group_uuid": "57192604-18e0-11ed-b79b-972795fc9504",
"image_files": [
{
"description": "Test image",
"file_uuid": "ffff1b46e377b91565ed53464cc8d859",
"filename": "a4fc82ba0010139e33c6209b917ac9c487172222.png"
}
],
"lab_tissue_sample_id": "Human Brain",
"last_modified_timestamp": 1681828388360,
"last_modified_user_displayname": "Test User",
"last_modified_user_email": "[email protected]",
"last_modified_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d",
"organ": "BR",
"protocol_url": "dx.doi.org/10.17504/protocols.io.3byl4j398lo5/v1",
"sample_category": "Organ",
"sennet_id": "SNT834.LVJG.639",
"thumbnail_file": {
"file_uuid": "ffffb2c9be7816087e13580e244855c5",
"filename": "image_handler.jpg"
},
"uuid": "4aff569f5d61477abfe5c40364d04a1c"
},
"protocol_url": "dx.doi.org/10.17504/protocols.io.3byl4j398lo5/v1",
"sample_category": "Organ",
"sennet_id": "SNT834.LVJG.639",
Expand Down
Loading