diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 41882d1..0dcec46 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -597,10 +597,10 @@ ENTITIES: type: string description: 'The timestamp of when this entity was last modified or published.' on_index_trigger: get_last_touch - origin_sample: + origin_samples: type: json_string - description: 'The Sample ancestor that has the sample_category of "Organ".' - on_index_trigger: get_origin_sample + description: 'The Sample ancestors that has the sample_category of "Organ".' + on_index_trigger: get_origin_samples assigned_to_group_name: type: string description: The group who is responsible for the next step in the ingest process diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 8c60698..bc51987 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -126,12 +126,12 @@ def get_dataset_direct_descendants(neo4j_driver, uuid, property_key=None, match_ """ -def get_origin_sample(neo4j_driver, uuid): +def get_origin_samples(neo4j_driver, uuid): result = {} query = (f"MATCH (e:Entity)-[:WAS_GENERATED_BY|USED*]->(s:Sample) " f"WHERE e.uuid='{uuid}' and s.sample_category='Organ' " - f"return s AS {record_field_name}") + f"return apoc.coll.toSet(COLLECT(s)) AS {record_field_name}") logger.info("======get_origin_sample() query======") logger.info(query) @@ -140,7 +140,7 @@ def get_origin_sample(neo4j_driver, uuid): record = session.read_transaction(_execute_readonly_tx, query) if record and record[record_field_name]: # Convert the entity node to dict - result = _node_to_dict(record[record_field_name]) + result = _nodes_to_dicts(record[record_field_name]) return result diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index ed2baca..a20b25b 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1734,7 +1734,7 @@ def get_last_touch(property_key, normalized_type, user_token, existing_data_dict return property_key, last_touch -def get_origin_sample(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_origin_samples(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): """Trigger event method to grab the ancestor of this entity where entity type is Sample and the sample_category is Organ. Parameters @@ -1763,19 +1763,20 @@ def get_origin_sample(property_key, normalized_type, user_token, existing_data_d # Return the organ if this is an organ return property_key, existing_data_dict - origin_sample = None + origin_samples = None if normalized_type in ["Sample", "Dataset", "Publication"]: - origin_sample = schema_neo4j_queries.get_origin_sample(schema_manager.get_neo4j_driver_instance(), + origin_samples = schema_neo4j_queries.get_origin_samples(schema_manager.get_neo4j_driver_instance(), existing_data_dict['uuid']) - organ_hierarchy_key, organ_hierarchy_value = get_organ_hierarchy(property_key='organ_hierarchy', - normalized_type=Ontology.ops().entities().SAMPLE, - user_token=user_token, - existing_data_dict=origin_sample, - new_data_dict=new_data_dict) - origin_sample[organ_hierarchy_key] = organ_hierarchy_value + for origin_sample in origin_samples: + organ_hierarchy_key, organ_hierarchy_value = get_organ_hierarchy(property_key='organ_hierarchy', + normalized_type=Ontology.ops().entities().SAMPLE, + user_token=user_token, + existing_data_dict=origin_sample, + new_data_dict=new_data_dict) + origin_sample[organ_hierarchy_key] = organ_hierarchy_value - return property_key, origin_sample + return property_key, origin_samples except Exception: logger.error(f"No origin sample found for {normalized_type} with UUID: {existing_data_dict['uuid']}") return property_key, None