diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 8e7db7a..4cdafdb 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -355,7 +355,7 @@ ENTITIES: derivation: source: true target: true - properties: + properties: &shared_dataset_properties <<: *shared_properties <<: *shared_entity_properties <<: *doi_properties @@ -597,10 +597,11 @@ ENTITIES: type: string description: 'The timestamp of when this entity was last modified or published.' on_index_trigger: get_last_touch - origin_sample: + origin_samples: type: json_string - description: 'The Sample ancestor that has the sample_category of "Organ".' - on_index_trigger: get_origin_sample + description: 'The Sample ancestors that has the sample_category of "Organ".' + on_read_trigger: get_origin_samples + on_index_trigger: get_origin_samples assigned_to_group_name: type: string description: The group who is responsible for the next step in the ingest process @@ -669,87 +670,25 @@ ENTITIES: source: true target: true properties: - error_message: - type: string - description: "An open text field that holds the last error message that arose from pipeline validation or analysis." - dataset_type: - before_create_trigger: set_publication_dataset_type - type: string - generated: true - immutable: true - description: "The assay types of this Dataset. Valid values are from UBKG are queried by schema_manager.get_valueset_dataset_type() using the Ontology API." + <<: *shared_dataset_properties title: type: string description: "The title of the publication." required_on_create: true # Only required for create via POST, not update via PUT - pipeline_message: - #todo: where is this attribute sourced from? Is it stored in the database? <- Not in neo4j - type: string - ingest_metadata: - type: json_string # dict - description: "The metadata returned from the processing at data submission time." - run_id: - type: string - ingest_id: - type: string - # A user who is a member of multiple groups HAS to send in the group_uuid - group_uuid: - type: string - immutable: true - description: "The uuid of globus group which the user who created this entity is a member of. This is required on Create/POST if the user creating the Donor is a member of more than one write group. This property cannot be set via PUT (only on Create/POST)." - before_create_trigger: set_group_uuid #method that, if group_uuid is not already set looks for membership in a single "data provider" group and sets to that. Otherwise if not set and no single "provider group" membership throws error - # No like image and metadata files handling for Donor/Sample - # Dataset has only one thumbnail file - thumbnail_file: - generated: true - type: json_string - description: "The dataset thumbnail file detail. Stored in db as a stringfied json, e.g., {'filename': 'thumbnail.jpg', 'file_uuid': 'dadasdasdadda'}" - # The updated_peripherally tag is a temporary measure to correctly handle any attributes - # which are potentially updated by multiple triggers - updated_peripherally: true - thumbnail_file_to_add: - type: json_string - transient: true - exposed: false - description: 'Just a temporary file id. Provide as a json object with an temp_file_id like {"temp_file_id":"dzevgd6xjs4d5grmcp4n"}' - before_create_trigger: commit_thumbnail_file - # This before_update_trigger with the same commit process can be used by ingest-api to update the dataset via PUT call - before_update_trigger: commit_thumbnail_file - # The updated_peripherally tag is a temporary measure to correctly handle any attributes - # which are potentially updated by multiple triggers - updated_peripherally: true - thumbnail_file_to_remove: - # This is only valid on update via a PUT request + creation_action: type: string transient: true - exposed: false - description: 'The thumbnail image file previously uploaded to delete. Provide as a string of the file_uuid like: "232934234234234234234270c0ea6c51d604a850558ef2247d0b4"' - before_update_trigger: delete_thumbnail_file - # The updated_peripherally tag is a temporary measure to correctly handle any attributes - # which are potentially updated by multiple triggers - updated_peripherally: true - retraction_reason: - type: string - before_property_update_validators: - - validate_if_retraction_permitted - - validate_sub_status_provided - description: 'Information recorded about why a the dataset was retracted.' - sub_status: - type: string - before_property_update_validators: - - validate_if_retraction_permitted - - validate_retraction_reason_provided - - validate_retracted_dataset_sub_status_value - description: 'A sub-status provided to further define the status. The only current allowable value is "Retracted"' - provider_info: - type: string - description: 'Information recorded about the data provider before an analysis pipeline is run on the data.' - dbgap_sra_experiment_url: - type: string - description: 'A URL linking the dataset to the associated uploaded data at dbGaP.' - dbgap_study_url: + generated: true + immutable: true + on_read_trigger: get_creation_action_activity + on_index_trigger: get_creation_action_activity + description: "The activity that was performed." + dataset_type: + before_create_trigger: set_publication_dataset_type type: string - description: 'A URL linking the dataset to the particular study on dbGap it belongs to' + generated: true + immutable: true + description: "The assay types of this Dataset. Valid values are from UBKG are queried by schema_manager.get_valueset_dataset_type() using the Ontology API." publication_date: type: string description: 'The date of publication' @@ -1012,10 +951,11 @@ ENTITIES: type: string description: 'The timestamp of when this entity was last modified or published.' on_index_trigger: get_last_touch - origin_sample: + origin_samples: type: json_string - description: 'The Sample ancestor that has the sample_category of "Organ".' - on_index_trigger: get_origin_sample + description: 'The list of Sample ancestors that has the sample_category of "Organ".' + on_read_trigger: get_origin_samples + on_index_trigger: get_origin_samples next_identifier: type: string immutable: true diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 8c60698..bc51987 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -126,12 +126,12 @@ def get_dataset_direct_descendants(neo4j_driver, uuid, property_key=None, match_ """ -def get_origin_sample(neo4j_driver, uuid): +def get_origin_samples(neo4j_driver, uuid): result = {} query = (f"MATCH (e:Entity)-[:WAS_GENERATED_BY|USED*]->(s:Sample) " f"WHERE e.uuid='{uuid}' and s.sample_category='Organ' " - f"return s AS {record_field_name}") + f"return apoc.coll.toSet(COLLECT(s)) AS {record_field_name}") logger.info("======get_origin_sample() query======") logger.info(query) @@ -140,7 +140,7 @@ def get_origin_sample(neo4j_driver, uuid): record = session.read_transaction(_execute_readonly_tx, query) if record and record[record_field_name]: # Convert the entity node to dict - result = _node_to_dict(record[record_field_name]) + result = _nodes_to_dicts(record[record_field_name]) return result diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index ed2baca..a20b25b 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1734,7 +1734,7 @@ def get_last_touch(property_key, normalized_type, user_token, existing_data_dict return property_key, last_touch -def get_origin_sample(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_origin_samples(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): """Trigger event method to grab the ancestor of this entity where entity type is Sample and the sample_category is Organ. Parameters @@ -1763,19 +1763,20 @@ def get_origin_sample(property_key, normalized_type, user_token, existing_data_d # Return the organ if this is an organ return property_key, existing_data_dict - origin_sample = None + origin_samples = None if normalized_type in ["Sample", "Dataset", "Publication"]: - origin_sample = schema_neo4j_queries.get_origin_sample(schema_manager.get_neo4j_driver_instance(), + origin_samples = schema_neo4j_queries.get_origin_samples(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid']) - organ_hierarchy_key, organ_hierarchy_value = get_organ_hierarchy(property_key='organ_hierarchy', - normalized_type=Ontology.ops().entities().SAMPLE, - user_token=user_token, - existing_data_dict=origin_sample, - new_data_dict=new_data_dict) - origin_sample[organ_hierarchy_key] = organ_hierarchy_value + for origin_sample in origin_samples: + organ_hierarchy_key, organ_hierarchy_value = get_organ_hierarchy(property_key='organ_hierarchy', + normalized_type=Ontology.ops().entities().SAMPLE, + user_token=user_token, + existing_data_dict=origin_sample, + new_data_dict=new_data_dict) + origin_sample[organ_hierarchy_key] = organ_hierarchy_value - return property_key, origin_sample + return property_key, origin_samples except Exception: logger.error(f"No origin sample found for {normalized_type} with UUID: {existing_data_dict['uuid']}") return property_key, None diff --git a/test/data/get_ancestors_success_dataset.json b/test/data/get_ancestors_success_dataset.json index bfe7718..f665958 100644 --- a/test/data/get_ancestors_success_dataset.json +++ b/test/data/get_ancestors_success_dataset.json @@ -89,6 +89,25 @@ "last_modified_user_email": "TESTUSER@example.com", "last_modified_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d", "organ": "BD", + "origin_samples": { + "created_by_user_displayname": "Test User", + "created_by_user_email": "TESTUSER@example.com", + "created_by_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d", + "created_timestamp": 1681828779121, + "data_access_level": "consortium", + "entity_type": "Sample", + "group_name": "CODCC Testing Group", + "group_uuid": "57192604-18e0-11ed-b79b-972795fc9504", + "lab_tissue_sample_id": "Human Blood", + "last_modified_timestamp": 1681828779121, + "last_modified_user_displayname": "Test User", + "last_modified_user_email": "TESTUSER@example.com", + "last_modified_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d", + "organ": "BD", + "sample_category": "Organ", + "sennet_id": "SNT458.VPHX.635", + "uuid": "cf3d0408de9afd703c8bd71808176b38" + }, "sample_category": "Organ", "sennet_id": "SNT458.VPHX.635", "uuid": "cf3d0408de9afd703c8bd71808176b38", diff --git a/test/data/get_entity_by_type_success_sample.json b/test/data/get_entity_by_type_success_sample.json index 8c27abb..3dee0b5 100644 --- a/test/data/get_entity_by_type_success_sample.json +++ b/test/data/get_entity_by_type_success_sample.json @@ -23,6 +23,38 @@ "last_modified_user_email": "TESTUSER@example.com", "last_modified_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d", "organ": "BR", + "origin_samples": { + "created_by_user_displayname": "Test User", + "created_by_user_email": "TESTUSER@example.com", + "created_by_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d", + "created_timestamp": 1681828388360, + "data_access_level": "consortium", + "description": "Sample lab notes", + "entity_type": "Sample", + "group_name": "CODCC Testing Group", + "group_uuid": "57192604-18e0-11ed-b79b-972795fc9504", + "image_files": [ + { + "description": "Test image", + "file_uuid": "ffff1b46e377b91565ed53464cc8d859", + "filename": "a4fc82ba0010139e33c6209b917ac9c487172222.png" + } + ], + "lab_tissue_sample_id": "Human Brain", + "last_modified_timestamp": 1681828388360, + "last_modified_user_displayname": "Test User", + "last_modified_user_email": "TESTUSER@example.com", + "last_modified_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d", + "organ": "BR", + "protocol_url": "dx.doi.org/10.17504/protocols.io.3byl4j398lo5/v1", + "sample_category": "Organ", + "sennet_id": "SNT834.LVJG.639", + "thumbnail_file": { + "file_uuid": "ffffb2c9be7816087e13580e244855c5", + "filename": "image_handler.jpg" + }, + "uuid": "3c4fc147a08429f58856779fcde96f42" + }, "protocol_url": "dx.doi.org/10.17504/protocols.io.3byl4j398lo5/v1", "sample_category": "Organ", "sennet_id": "SNT834.LVJG.639", @@ -75,6 +107,38 @@ "last_modified_user_email": "TESTUSER@example.com", "last_modified_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d", "organ": "BR", + "origin_samples": { + "created_by_user_displayname": "Test User", + "created_by_user_email": "TESTUSER@example.com", + "created_by_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d", + "created_timestamp": 1681828388360, + "data_access_level": "consortium", + "description": "Sample lab notes", + "entity_type": "Sample", + "group_name": "CODCC Testing Group", + "group_uuid": "57192604-18e0-11ed-b79b-972795fc9504", + "image_files": [ + { + "description": "Test image", + "file_uuid": "ffff1b46e377b91565ed53464cc8d859", + "filename": "a4fc82ba0010139e33c6209b917ac9c487172222.png" + } + ], + "lab_tissue_sample_id": "Human Brain", + "last_modified_timestamp": 1681828388360, + "last_modified_user_displayname": "Test User", + "last_modified_user_email": "TESTUSER@example.com", + "last_modified_user_sub": "9e5b670f-228d-433c-bb86-a3228d5ca49d", + "organ": "BR", + "protocol_url": "dx.doi.org/10.17504/protocols.io.3byl4j398lo5/v1", + "sample_category": "Organ", + "sennet_id": "SNT834.LVJG.639", + "thumbnail_file": { + "file_uuid": "ffffb2c9be7816087e13580e244855c5", + "filename": "image_handler.jpg" + }, + "uuid": "4aff569f5d61477abfe5c40364d04a1c" + }, "protocol_url": "dx.doi.org/10.17504/protocols.io.3byl4j398lo5/v1", "sample_category": "Organ", "sennet_id": "SNT834.LVJG.639",