From 73df87d2f5553002c8472c6a1f59c06405427015 Mon Sep 17 00:00:00 2001 From: Lisa-Ann B Date: Wed, 2 Aug 2023 14:34:32 -0400 Subject: [PATCH 01/14] Port over methods from hm - #116 --- src/lib/__init__.py | 36 +++ src/lib/file.py | 23 +- src/lib/ontology.py | 8 +- src/routes/entity_CRUD/__init__.py | 206 +++++++++++++++++- src/routes/validation/ingest_validation_tools | 2 +- 5 files changed, 267 insertions(+), 8 deletions(-) diff --git a/src/lib/__init__.py b/src/lib/__init__.py index e69de29b..7a1f560b 100644 --- a/src/lib/__init__.py +++ b/src/lib/__init__.py @@ -0,0 +1,36 @@ +from flask import Blueprint, jsonify, request, Response, current_app, abort, json +import urllib.request +from hubmap_commons import file_helper as commons_file_helper + +def get_globus_url(data_access_level, group_name, uuid): + globus_server_uuid = None + dir_path = " " + # public access + if data_access_level == "public": + globus_server_uuid = current_app.config['GLOBUS_PUBLIC_ENDPOINT_UUID'] + access_dir = commons_file_helper.ensureTrailingSlashURL(current_app.config['PUBLIC_DATA_SUBDIR']) + dir_path = dir_path + access_dir + "/" + # consortium access + elif data_access_level == 'consortium': + globus_server_uuid = current_app.config['GLOBUS_CONSORTIUM_ENDPOINT_UUID'] + access_dir = commons_file_helper.ensureTrailingSlashURL(current_app.config['CONSORTIUM_DATA_SUBDIR']) + dir_path = dir_path + access_dir + group_name + "/" + # protected access + elif data_access_level == 'protected': + globus_server_uuid = current_app.config['GLOBUS_PROTECTED_ENDPOINT_UUID'] + access_dir = commons_file_helper.ensureTrailingSlashURL(current_app.config['PROTECTED_DATA_SUBDIR']) + dir_path = dir_path + access_dir + group_name + "/" + + if globus_server_uuid is not None: + dir_path = dir_path + uuid + "/" + dir_path = urllib.parse.quote(dir_path, safe='') + + # https://current_app.globus.org/file-manager?origin_id=28bb03c-a87d-4dd7-a661-7ea2fb6ea631&origin_path=2%FIEC%20Testing%20Group%20F03584b3d0f8b46de1b29f04be1568779%2F + globus_url = commons_file_helper.ensureTrailingSlash(current_app.config[ + 'GLOBUS_APP_BASE_URL']) + "file-manager?origin_id=" + globus_server_uuid + "&origin_path=" + dir_path + + else: + globus_url = "" + if uuid is None: + globus_url = "" + return globus_url \ No newline at end of file diff --git a/src/lib/file.py b/src/lib/file.py index 28559ab8..a8e703b4 100644 --- a/src/lib/file.py +++ b/src/lib/file.py @@ -1,11 +1,11 @@ import csv +import os import logging from hubmap_commons import file_helper as commons_file_helper from flask import current_app, request from atlas_consortia_commons.rest import * from werkzeug import utils from collections import OrderedDict - from lib.file_upload_helper import UploadFileHelper logger = logging.getLogger(__name__) @@ -80,4 +80,23 @@ def ln_err(error: str, row: int = None, column: str = None): 'column': column, 'error': error, 'row': row - } \ No newline at end of file + } + + +def files_exist(uuid, data_access_level): + if not uuid or not data_access_level: + return False + if data_access_level == "public": + absolute_path = commons_file_helper.ensureTrailingSlashURL(current_app.config['GLOBUS_PUBLIC_ENDPOINT_FILEPATH']) + # consortium access + elif data_access_level == 'consortium': + absolute_path = commons_file_helper.ensureTrailingSlashURL(current_app.config['GLOBUS_CONSORTIUM_ENDPOINT_FILEPATH']) + # protected access + elif data_access_level == 'protected': + absolute_path = commons_file_helper.ensureTrailingSlashURL(current_app.config['GLOBUS_PROTECTED_ENDPOINT_FILEPATH']) + + file_path = absolute_path + uuid + if os.path.exists(file_path) and os.path.isdir(file_path) and os.listdir(file_path): + return True + else: + return False \ No newline at end of file diff --git a/src/lib/ontology.py b/src/lib/ontology.py index c6aa1785..bbba865b 100644 --- a/src/lib/ontology.py +++ b/src/lib/ontology.py @@ -38,7 +38,8 @@ def specimen_categories(in_enum: bool = False, as_data_dict: bool = False): obj_type=_get_obj_type(in_enum, as_data_dict)) -def organ_types(in_enum: bool = False, as_data_dict: bool = False): +def organ_types(in_enum: bool = False, as_data_dict: bool = False, + prop_callback=to_snake_case_upper, data_as_val=False): return _build_enum_class('OrganTypes', current_app.ubkg.organ_types, key='rui_code', val_key='term', obj_type=_get_obj_type(in_enum, as_data_dict)) @@ -95,8 +96,9 @@ def specimen_categories(as_arr: bool = False, cb=str, as_data_dict: bool = False return Ontology._as_list_or_class(specimen_categories(as_arr, as_data_dict), as_arr, cb) @staticmethod - def organ_types(as_arr: bool = False, cb=str, as_data_dict: bool = False): - return Ontology._as_list_or_class(organ_types(as_arr, as_data_dict), as_arr, cb) + def organ_types(as_arr: bool = False, cb=str, as_data_dict: bool = False, prop_callback=to_snake_case_upper, data_as_val=False): + return Ontology._as_list_or_class(organ_types(as_arr, as_data_dict, prop_callback, + data_as_val=data_as_val), as_arr, cb) @staticmethod def source_types(as_arr: bool = False, cb=str, as_data_dict: bool = False): diff --git a/src/routes/entity_CRUD/__init__.py b/src/routes/entity_CRUD/__init__.py index c52bfbee..ef30f266 100644 --- a/src/routes/entity_CRUD/__init__.py +++ b/src/routes/entity_CRUD/__init__.py @@ -3,6 +3,7 @@ import requests import os import re +import datetime import urllib.request import yaml from hubmap_sdk import EntitySdk @@ -18,6 +19,7 @@ from atlas_consortia_commons.object import includes from lib.file_upload_helper import UploadFileHelper +from lib import get_globus_url entity_CRUD_blueprint = Blueprint('entity_CRUD', __name__) @@ -29,8 +31,7 @@ from routes.entity_CRUD.constraints_helper import * from routes.auth import get_auth_header, get_auth_header_dict from lib.ontology import Ontology, enum_val_lower, get_organ_types_ep, get_assay_types_ep -from lib.file import get_csv_records, get_base_path, check_upload, ln_err - +from lib.file import get_csv_records, get_base_path, check_upload, ln_err, files_exist @entity_CRUD_blueprint.route('/datasets', methods=['POST']) @@ -550,6 +551,207 @@ def update_ingest_status(): logger.error(e, exc_info=True) return Response("Unexpected error while saving dataset: " + str(e), 500) + + +def run_query(query, results, i): + logger.info(query) + with current_app.neo4j_driver_instance.session() as session: + results[i] = session.run(query).data() + +""" +Description +""" +@app.route('/datasets/data-status', methods=['GET']) +def dataset_data_status(): + primary_assays_url = current_app.config['UBKG_WEBSERVICE_URL'] + 'assaytype?application_context=HUBMAP&primary=true' + alt_assays_url = current_app.config['UBKG_WEBSERVICE_URL'] + 'assaytype?application_context=HUBMAP&primary=false' + primary_assay_types_list = requests.get(primary_assays_url).json().get("result") + alt_assay_types_list = requests.get(alt_assays_url).json().get("result") + assay_types_dict = {item["name"].strip(): item for item in primary_assay_types_list + alt_assay_types_list} + #organ_types_url = current_app.config['UBKG_WEBSERVICE_URL'] + 'organs/by-code?application_context=HUBMAP' + organ_types_dict = Ontology.organ_types(as_data_dict=True, prop_callback=None, data_as_val=True) #requests.get(organ_types_url).json() + all_datasets_query = ( + "MATCH (ds:Dataset)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(ancestor) " + "RETURN ds.uuid AS uuid, ds.group_name AS group_name, ds.data_types AS data_types, " + "ds.sennet_id AS sennet_id, ds.lab_dataset_id AS provider_experiment_id, ds.status AS status, " + "ds.last_modified_timestamp AS last_touch, ds.data_access_level AS data_access_level, " + "COALESCE(ds.contributors IS NOT NULL) AS has_contributors, COALESCE(ds.contacts IS NOT NULL) AS has_contacts, " + "ancestor.entity_type AS ancestor_entity_type" + ) + + organ_query = ( + "MATCH (ds:Dataset)<-[*]-(o:Sample {sample_category: 'organ'}) " + "WHERE (ds)<-[:ACTIVITY_OUTPUT]-(:Activity) " + "RETURN DISTINCT ds.uuid AS uuid, o.organ AS organ, o.sennet_id as organ_sennet_id, o.uuid as organ_uuid " + ) + + donor_query = ( + "MATCH (ds:Dataset)<-[*]-(dn:Donor) " + "WHERE (ds)<-[:ACTIVITY_OUTPUT]-(:Activity) " + "RETURN DISTINCT ds.uuid AS uuid, " + "COLLECT(DISTINCT dn.sennet_id) AS donor_sennet_id, COLLECT(DISTINCT dn.submission_id) AS donor_submission_id, " + "COLLECT(DISTINCT dn.lab_donor_id) AS donor_lab_id, COALESCE(dn.metadata IS NOT NULL) AS has_metadata" + ) + + descendant_datasets_query = ( + "MATCH (dds:Dataset)<-[*]-(ds:Dataset)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(:Sample) " + "RETURN DISTINCT ds.uuid AS uuid, COLLECT(DISTINCT dds.sennet_id) AS descendant_datasets" + ) + + upload_query = ( + "MATCH (u:Upload)<-[:IN_UPLOAD]-(ds) " + "RETURN DISTINCT ds.uuid AS uuid, COLLECT(DISTINCT u.sennet_id) AS upload" + ) + + has_rui_query = ( + "MATCH (ds:Dataset) " + "WHERE (ds)<-[:ACTIVITY_OUTPUT]-(:Activity) " + "WITH ds, [(ds)<-[*]-(s:Sample) | s.rui_location] AS rui_locations " + "RETURN ds.uuid AS uuid, any(rui_location IN rui_locations WHERE rui_location IS NOT NULL) AS has_rui_info" + ) + + displayed_fields = [ + "sennet_id", "group_name", "status", "organ", "provider_experiment_id", "last_touch", "has_contacts", + "has_contributors", "data_types", "donor_sennet_id", "donor_submission_id", "donor_lab_id", + "has_metadata", "descendant_datasets", "upload", "has_rui_info", "globus_url", "portal_url", "ingest_url", + "has_data", "organ_sennet_id" + ] + + queries = [all_datasets_query, organ_query, donor_query, descendant_datasets_query, + upload_query, has_rui_query] + results = [None] * len(queries) + threads = [] + for i, query in enumerate(queries): + thread = Thread(target=run_query, args=(query, results, i)) + thread.start() + threads.append(thread) + for thread in threads: + thread.join() + output_dict = {} + # Here we specifically indexed the values in 'results' in case certain threads completed out of order + all_datasets_result = results[0] + organ_result = results[1] + donor_result = results[2] + descendant_datasets_result = results[3] + upload_result = results[4] + has_rui_result = results[5] + + for dataset in all_datasets_result: + output_dict[dataset['uuid']] = dataset + for dataset in organ_result: + if output_dict.get(dataset['uuid']): + output_dict[dataset['uuid']]['organ'] = dataset['organ'] + output_dict[dataset['uuid']]['organ_sennet_id'] = dataset['organ_sennet_id'] + output_dict[dataset['uuid']]['organ_uuid'] = dataset['organ_uuid'] + for dataset in donor_result: + if output_dict.get(dataset['uuid']): + output_dict[dataset['uuid']]['donor_sennet_id'] = dataset['donor_sennet_id'] + output_dict[dataset['uuid']]['donor_submission_id'] = dataset['donor_submission_id'] + output_dict[dataset['uuid']]['donor_lab_id'] = dataset['donor_lab_id'] + output_dict[dataset['uuid']]['has_metadata'] = dataset['has_metadata'] + for dataset in descendant_datasets_result: + if output_dict.get(dataset['uuid']): + output_dict[dataset['uuid']]['descendant_datasets'] = dataset['descendant_datasets'] + for dataset in upload_result: + if output_dict.get(dataset['uuid']): + output_dict[dataset['uuid']]['upload'] = dataset['upload'] + for dataset in has_rui_result: + if output_dict.get(dataset['uuid']): + output_dict[dataset['uuid']]['has_rui_info'] = dataset['has_rui_info'] + + combined_results = [] + for uuid in output_dict: + combined_results.append(output_dict[uuid]) + + for dataset in combined_results: + globus_url = get_globus_url(dataset.get('data_access_level'), dataset.get('group_name'), dataset.get('uuid')) + dataset['globus_url'] = globus_url + portal_url = commons_file_helper.ensureTrailingSlashURL(current_app.config['PORTAL_URL']) + 'dataset' + '/' + dataset[ + 'uuid'] + dataset['portal_url'] = portal_url + ingest_url = commons_file_helper.ensureTrailingSlashURL(current_app.config['INGEST_URL']) + 'dataset' + '/' + dataset[ + 'uuid'] + dataset['ingest_url'] = ingest_url + if dataset.get('organ_uuid'): + organ_portal_url = commons_file_helper.ensureTrailingSlashURL(current_app.config['PORTAL_URL']) + 'sample' + '/' + dataset['organ_uuid'] + dataset['organ_portal_url'] = organ_portal_url + else: + dataset['organ_portal_url'] = "" + dataset['last_touch'] = str(datetime.datetime.utcfromtimestamp(dataset['last_touch']/1000)) + if dataset.get('ancestor_entity_type').lower() != "dataset": + dataset['is_primary'] = "true" + else: + dataset['is_primary'] = "false" + has_data = files_exist(dataset.get('uuid'), dataset.get('data_access_level')) + dataset['has_data'] = has_data + + for prop in dataset: + if isinstance(dataset[prop], list): + dataset[prop] = ", ".join(dataset[prop]) + if isinstance(dataset[prop], (bool, int)): + dataset[prop] = str(dataset[prop]) + if dataset[prop] and dataset[prop][0] == "[" and dataset[prop][-1] == "]": + dataset[prop] = dataset[prop].replace("'",'"') + dataset[prop] = json.loads(dataset[prop]) + dataset[prop] = dataset[prop][0] + if dataset[prop] is None: + dataset[prop] = " " + if dataset.get('data_types') and dataset.get('data_types') in assay_types_dict: + dataset['data_types'] = assay_types_dict[dataset['data_types']]['description'].strip() + for field in displayed_fields: + if dataset.get(field) is None: + dataset[field] = " " + if dataset.get('organ') and dataset['organ'].upper() not in ['HT', 'LV', 'LN', 'RK', 'LK']: + dataset['has_rui_info'] = "not-applicable" + if dataset.get('organ') and dataset.get('organ') in organ_types_dict: + dataset['organ'] = organ_types_dict[dataset['organ']] + + return jsonify(combined_results) + + +""" +Description +""" +@app.route('/uploads/data-status', methods=['GET']) +def upload_data_status(): + all_uploads_query = ( + "MATCH (up:Upload) " + "OPTIONAL MATCH (up)<-[:IN_UPLOAD]-(ds:Dataset) " + "RETURN up.uuid AS uuid, up.group_name AS group_name, up.sennet_id AS sennet_id, up.status AS status, " + "up.title AS title, COLLECT(DISTINCT ds.uuid) AS datasets " + ) + + displayed_fields = [ + "uuid", "group_name", "sennet_id", "status", "title", "datasets" + ] + + with current_app.neo4j_driver_instance.session() as session: + results = session.run(all_uploads_query).data() + for upload in results: + globus_url = get_globus_url('protected', upload.get('group_name'), upload.get('uuid')) + upload['globus_url'] = globus_url + ingest_url = commons_file_helper.ensureTrailingSlashURL(current_app.config['INGEST_URL']) + 'upload' + '/' + upload[ + 'uuid'] + upload['ingest_url'] = ingest_url + for prop in upload: + if isinstance(upload[prop], list): + upload[prop] = ", ".join(upload[prop]) + if isinstance(upload[prop], (bool, int)): + upload[prop] = str(upload[prop]) + if upload[prop] and upload[prop][0] == "[" and upload[prop][-1] == "]": + upload[prop] = upload[prop].replace("'",'"') + upload[prop] = json.loads(upload[prop]) + upload[prop] = upload[prop][0] + if upload[prop] is None: + upload[prop] = " " + for field in displayed_fields: + if upload.get(field) is None: + upload[field] = " " + # TODO: Once url parameters are implemented in the front-end for the data-status dashboard, we'll need to return a + # TODO: link to the datasets page only displaying datasets belonging to a given upload. + return jsonify(results) + + def _get_status_code__by_priority(codes): if StatusCodes.SERVER_ERR in codes: return StatusCodes.SERVER_ERR diff --git a/src/routes/validation/ingest_validation_tools b/src/routes/validation/ingest_validation_tools index 51e20326..07185f27 160000 --- a/src/routes/validation/ingest_validation_tools +++ b/src/routes/validation/ingest_validation_tools @@ -1 +1 @@ -Subproject commit 51e20326ff48e1a230c79cd4baa6c9dafb08fe45 +Subproject commit 07185f27b24288207ec6297f999471190d807c9f From 734da6a29a5783eb1a924009ee780c8f89da6c85 Mon Sep 17 00:00:00 2001 From: Lisa-Ann B Date: Thu, 3 Aug 2023 08:49:18 -0400 Subject: [PATCH 02/14] Resolve port - #116 --- src/routes/entity_CRUD/__init__.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/src/routes/entity_CRUD/__init__.py b/src/routes/entity_CRUD/__init__.py index 88099532..4b777792 100644 --- a/src/routes/entity_CRUD/__init__.py +++ b/src/routes/entity_CRUD/__init__.py @@ -3,11 +3,8 @@ import requests import os import re -<<<<<<< HEAD import datetime -======= from typing import List ->>>>>>> dev-integrate import urllib.request import yaml from hubmap_sdk import EntitySdk @@ -24,11 +21,8 @@ from atlas_consortia_commons.object import includes from lib.file_upload_helper import UploadFileHelper -<<<<<<< HEAD from lib import get_globus_url -======= from lib.datacite_doi_helper import DataCiteDoiHelper ->>>>>>> dev-integrate entity_CRUD_blueprint = Blueprint('entity_CRUD', __name__) @@ -535,15 +529,10 @@ def run_query(query, results, i): """ Description """ -@app.route('/datasets/data-status', methods=['GET']) +@entity_CRUD_blueprint.route('/datasets/data-status', methods=['GET']) def dataset_data_status(): - primary_assays_url = current_app.config['UBKG_WEBSERVICE_URL'] + 'assaytype?application_context=HUBMAP&primary=true' - alt_assays_url = current_app.config['UBKG_WEBSERVICE_URL'] + 'assaytype?application_context=HUBMAP&primary=false' - primary_assay_types_list = requests.get(primary_assays_url).json().get("result") - alt_assay_types_list = requests.get(alt_assays_url).json().get("result") - assay_types_dict = {item["name"].strip(): item for item in primary_assay_types_list + alt_assay_types_list} - #organ_types_url = current_app.config['UBKG_WEBSERVICE_URL'] + 'organs/by-code?application_context=HUBMAP' - organ_types_dict = Ontology.organ_types(as_data_dict=True, prop_callback=None, data_as_val=True) #requests.get(organ_types_url).json() + assay_types_dict = Ontology.assay_types(prop_callback=None, as_data_dict=True) + organ_types_dict = Ontology.organ_types(as_data_dict=True, prop_callback=None, data_as_val=True) all_datasets_query = ( "MATCH (ds:Dataset)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(ancestor) " "RETURN ds.uuid AS uuid, ds.group_name AS group_name, ds.data_types AS data_types, " @@ -686,7 +675,7 @@ def dataset_data_status(): """ Description """ -@app.route('/uploads/data-status', methods=['GET']) +@entity_CRUD_blueprint.route('/uploads/data-status', methods=['GET']) def upload_data_status(): all_uploads_query = ( "MATCH (up:Upload) " From 91bd58784253e4f347634dc2505953a071299201 Mon Sep 17 00:00:00 2001 From: Lisa-Ann B Date: Thu, 3 Aug 2023 11:46:31 -0400 Subject: [PATCH 03/14] Add /data-ingest-board-login from HM - #116 --- src/instance/app.cfg.example | 1 + src/routes/auth/__init__.py | 84 ++++++++++++++++++------------ src/routes/entity_CRUD/__init__.py | 2 +- 3 files changed, 52 insertions(+), 35 deletions(-) diff --git a/src/instance/app.cfg.example b/src/instance/app.cfg.example index 84d1d6e5..e8f86e1f 100644 --- a/src/instance/app.cfg.example +++ b/src/instance/app.cfg.example @@ -20,6 +20,7 @@ GLOBUS_CLIENT_APP_NAME = 'SenNet Data Portal' # Point to remote URL for testing and production deployment GLOBUS_CLIENT_APP_URI = 'https://data.dev.sennetconsortium.org/' +DATA_INGEST_BOARD_APP_URI = 'http://localhost:3001/' GLOBUS_BASE_FILE_USER_NAME = 'hive' GLOBUS_ADMIN_FILE_USER_NAME = 'shirey' diff --git a/src/routes/auth/__init__.py b/src/routes/auth/__init__.py index abc83419..7e168f6c 100644 --- a/src/routes/auth/__init__.py +++ b/src/routes/auth/__init__.py @@ -10,17 +10,48 @@ # Endpoints for UI Login and Logout - # Redirect users from react app login page to Globus auth login widget then redirect back @auth_blueprint.route('/login') def login(): + return _login(current_app.config['GLOBUS_CLIENT_APP_URI']) + +@auth_blueprint.route('/data-ingest-board-login') +def data_ingest_login(): + return _login(redirect_uri=current_app.config['DATA_INGEST_BOARD_APP_URI'], key='ingest_board_tokens') + + +@auth_blueprint.route('/logout') +def logout(): + return _logout(redirect_uri=current_app.config['GLOBUS_CLIENT_APP_URI']) + + +# @auth_blueprint.route('/data-ingest-board-logout') +# def data_ingest_logout(): +# return _login(redirect_uri=current_app.config['DATA_INGEST_BOARD_APP_URI'], key='ingest_board_tokens') + + +def get_user_info(token): + auth_client = AuthClient(authorizer=AccessTokenAuthorizer(token)) + return auth_client.oauth2_userinfo() + + +def get_auth_header_dict(token) -> dict: + return {'Authorization': 'Bearer ' + token, 'X-SenNet-Application': 'ingest-api'} + + +def get_auth_header() -> dict: + auth_helper_instance = AuthHelper.instance() + token = auth_helper_instance.getAuthorizationTokens(request.headers) + return get_auth_header_dict(token) + +def _login(redirect_uri, key = 'tokens'): #redirect_uri = url_for('login', _external=True) - redirect_uri = current_app.config['FLASK_APP_BASE_URI'] + 'login' + _redirect_uri = current_app.config['FLASK_APP_BASE_URI'] + request.path.replace('/', '') - confidential_app_auth_client =\ + confidential_app_auth_client = \ ConfidentialAppAuthClient(current_app.config['APP_CLIENT_ID'], current_app.config['APP_CLIENT_SECRET']) - confidential_app_auth_client.oauth2_start_flow(redirect_uri, refresh_tokens=True) + confidential_app_auth_client.oauth2_start_flow(_redirect_uri, refresh_tokens=True) # If there's no "code" query string parameter, we're in this route # starting a Globus Auth login flow. @@ -57,31 +88,31 @@ def login(): json_str = json.dumps(info) # Store the resulting tokens in server session - session.update( - tokens=token_response.by_resource_server - ) + # session.update( + # tokens=token_response.by_resource_server + # ) + session[key] = token_response.by_resource_server logger.info(f"Logged in User: {user_info['name']}") # Finally redirect back to the client - return redirect(current_app.config['GLOBUS_CLIENT_APP_URI'] + '?info=' + str(json_str)) + return redirect(redirect_uri + '?info=' + str(json_str)) -@auth_blueprint.route('/logout') -def logout(): +def _logout(redirect_uri, key='tokens'): """ - Revoke the tokens with Globus Auth. - Destroy the session state. - Redirect the user to the Globus Auth logout page. """ - confidential_app_auth_client =\ + confidential_app_auth_client = \ ConfidentialAppAuthClient(current_app.config['APP_CLIENT_ID'], current_app.config['APP_CLIENT_SECRET']) # Revoke the tokens with Globus Auth - if 'tokens' in session: + if key in session: for token in (token_info['access_token'] - for token_info in session['tokens'].values()): - confidential_app_auth_client.oauth2_revoke_token(token) + for token_info in session[key].values()): + confidential_app_auth_client.oauth2_revoke_token(token) # Destroy the session state session.clear() @@ -89,25 +120,10 @@ def logout(): # build the logout URI with query params # there is no tool to help build this (yet!) globus_logout_url = ( - 'https://auth.globus.org/v2/web/logout' + - '?client={}'.format(current_app.config['APP_CLIENT_ID']) + - '&redirect_uri={}'.format(current_app.config['GLOBUS_CLIENT_APP_URI']) + - '&redirect_name={}'.format(current_app.config['GLOBUS_CLIENT_APP_NAME'])) + 'https://auth.globus.org/v2/web/logout' + + '?client={}'.format(current_app.config['APP_CLIENT_ID']) + + '&redirect_uri={}'.format(redirect_uri) + + '&redirect_name={}'.format(current_app.config['GLOBUS_CLIENT_APP_NAME'])) # Redirect the user to the Globus Auth logout page - return redirect(globus_logout_url) - - -def get_user_info(token): - auth_client = AuthClient(authorizer=AccessTokenAuthorizer(token)) - return auth_client.oauth2_userinfo() - - -def get_auth_header_dict(token) -> dict: - return {'Authorization': 'Bearer ' + token, 'X-SenNet-Application': 'ingest-api'} - - -def get_auth_header() -> dict: - auth_helper_instance = AuthHelper.instance() - token = auth_helper_instance.getAuthorizationTokens(request.headers) - return get_auth_header_dict(token) + return redirect(globus_logout_url) \ No newline at end of file diff --git a/src/routes/entity_CRUD/__init__.py b/src/routes/entity_CRUD/__init__.py index 4b777792..d4015d77 100644 --- a/src/routes/entity_CRUD/__init__.py +++ b/src/routes/entity_CRUD/__init__.py @@ -532,7 +532,7 @@ def run_query(query, results, i): @entity_CRUD_blueprint.route('/datasets/data-status', methods=['GET']) def dataset_data_status(): assay_types_dict = Ontology.assay_types(prop_callback=None, as_data_dict=True) - organ_types_dict = Ontology.organ_types(as_data_dict=True, prop_callback=None, data_as_val=True) + organ_types_dict = current_app.ubkg.get_ubkg_by_endpoint(current_app.ubkg.organ_types) all_datasets_query = ( "MATCH (ds:Dataset)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(ancestor) " "RETURN ds.uuid AS uuid, ds.group_name AS group_name, ds.data_types AS data_types, " From a7ed3a4f9dd20589ede18e79b9c8f2635d1873ca Mon Sep 17 00:00:00 2001 From: Lisa-Ann B Date: Thu, 3 Aug 2023 12:10:32 -0400 Subject: [PATCH 04/14] Add DATA_INGEST_BOARD_NAME --- src/instance/app.cfg.example | 1 + src/routes/auth/__init__.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/instance/app.cfg.example b/src/instance/app.cfg.example index e8f86e1f..f30a2b68 100644 --- a/src/instance/app.cfg.example +++ b/src/instance/app.cfg.example @@ -21,6 +21,7 @@ GLOBUS_CLIENT_APP_NAME = 'SenNet Data Portal' # Point to remote URL for testing and production deployment GLOBUS_CLIENT_APP_URI = 'https://data.dev.sennetconsortium.org/' DATA_INGEST_BOARD_APP_URI = 'http://localhost:3001/' +DATA_INGEST_BOARD_NAME = ‘Data Ingest Board - DEV’ GLOBUS_BASE_FILE_USER_NAME = 'hive' GLOBUS_ADMIN_FILE_USER_NAME = 'shirey' diff --git a/src/routes/auth/__init__.py b/src/routes/auth/__init__.py index 7e168f6c..2a3cd6cc 100644 --- a/src/routes/auth/__init__.py +++ b/src/routes/auth/__init__.py @@ -22,12 +22,12 @@ def data_ingest_login(): @auth_blueprint.route('/logout') def logout(): - return _logout(redirect_uri=current_app.config['GLOBUS_CLIENT_APP_URI']) + return _logout(redirect_uri=current_app.config['GLOBUS_CLIENT_APP_URI'], app_name=current_app.config['GLOBUS_CLIENT_APP_NAME']) # @auth_blueprint.route('/data-ingest-board-logout') # def data_ingest_logout(): -# return _login(redirect_uri=current_app.config['DATA_INGEST_BOARD_APP_URI'], key='ingest_board_tokens') +# return _login(redirect_uri=current_app.config['DATA_INGEST_BOARD_APP_URI'], app_name=current_app.config['DATA_INGEST_BOARD_NAME'], key='ingest_board_tokens') def get_user_info(token): @@ -98,7 +98,7 @@ def _login(redirect_uri, key = 'tokens'): return redirect(redirect_uri + '?info=' + str(json_str)) -def _logout(redirect_uri, key='tokens'): +def _logout(redirect_uri, app_name, key='tokens'): """ - Revoke the tokens with Globus Auth. - Destroy the session state. @@ -123,7 +123,7 @@ def _logout(redirect_uri, key='tokens'): 'https://auth.globus.org/v2/web/logout' + '?client={}'.format(current_app.config['APP_CLIENT_ID']) + '&redirect_uri={}'.format(redirect_uri) + - '&redirect_name={}'.format(current_app.config['GLOBUS_CLIENT_APP_NAME'])) + '&redirect_name={}'.format(app_name)) # Redirect the user to the Globus Auth logout page return redirect(globus_logout_url) \ No newline at end of file From f061aa3727f689a1c8e10c2fc767092476649c22 Mon Sep 17 00:00:00 2001 From: Lisa-Ann B Date: Fri, 4 Aug 2023 13:46:51 -0400 Subject: [PATCH 05/14] Update Ontology --- src/lib/ontology.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lib/ontology.py b/src/lib/ontology.py index 93058a32..6324a322 100644 --- a/src/lib/ontology.py +++ b/src/lib/ontology.py @@ -48,7 +48,8 @@ def specimen_categories(in_enum: bool = False, as_data_dict: bool = False): def organ_types(in_enum: bool = False, as_data_dict: bool = False, prop_callback=to_snake_case_upper, data_as_val=False): return _build_enum_class('OrganTypes', current_app.ubkg.organ_types, key='rui_code', val_key='term', - obj_type=_get_obj_type(in_enum, as_data_dict)) + obj_type=_get_obj_type(in_enum, as_data_dict), + prop_callback=prop_callback, data_as_val=data_as_val) def assay_types(in_enum: bool = False, as_data_dict: bool = False, @@ -110,7 +111,8 @@ def specimen_categories(as_arr: bool = False, cb=str, as_data_dict: bool = False return Ontology._as_list_or_class(specimen_categories(as_arr, as_data_dict), as_arr, cb) @staticmethod - def organ_types(as_arr: bool = False, cb=str, as_data_dict: bool = False, prop_callback=to_snake_case_upper, data_as_val=False): + def organ_types(as_arr: bool = False, cb=str, as_data_dict: bool = False, prop_callback=to_snake_case_upper, + data_as_val=False): return Ontology._as_list_or_class(organ_types(as_arr, as_data_dict, prop_callback, data_as_val=data_as_val), as_arr, cb) From bfe8da63fe384d25c8c45760e51d9d677a69314e Mon Sep 17 00:00:00 2001 From: Lisa-Ann B Date: Mon, 7 Aug 2023 09:12:47 -0400 Subject: [PATCH 06/14] Uncomment logout method, resolve query --- src/routes/entity_CRUD/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/routes/entity_CRUD/__init__.py b/src/routes/entity_CRUD/__init__.py index d4015d77..f2b3834b 100644 --- a/src/routes/entity_CRUD/__init__.py +++ b/src/routes/entity_CRUD/__init__.py @@ -553,7 +553,7 @@ def dataset_data_status(): "WHERE (ds)<-[:ACTIVITY_OUTPUT]-(:Activity) " "RETURN DISTINCT ds.uuid AS uuid, " "COLLECT(DISTINCT dn.sennet_id) AS donor_sennet_id, COLLECT(DISTINCT dn.submission_id) AS donor_submission_id, " - "COLLECT(DISTINCT dn.lab_donor_id) AS donor_lab_id, COALESCE(dn.metadata IS NOT NULL) AS has_metadata" + "COLLECT(DISTINCT dn.lab_source_id) AS donor_lab_id, COALESCE(dn.metadata IS NOT NULL) AS has_metadata" ) descendant_datasets_query = ( From 49cb48cf029a5b1976d9c3ee686d67a9662557d1 Mon Sep 17 00:00:00 2001 From: Lisa-Ann B Date: Mon, 7 Aug 2023 10:29:44 -0400 Subject: [PATCH 07/14] Uncomment logout - #116 --- src/routes/auth/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/routes/auth/__init__.py b/src/routes/auth/__init__.py index 2a3cd6cc..dac17976 100644 --- a/src/routes/auth/__init__.py +++ b/src/routes/auth/__init__.py @@ -25,9 +25,9 @@ def logout(): return _logout(redirect_uri=current_app.config['GLOBUS_CLIENT_APP_URI'], app_name=current_app.config['GLOBUS_CLIENT_APP_NAME']) -# @auth_blueprint.route('/data-ingest-board-logout') -# def data_ingest_logout(): -# return _login(redirect_uri=current_app.config['DATA_INGEST_BOARD_APP_URI'], app_name=current_app.config['DATA_INGEST_BOARD_NAME'], key='ingest_board_tokens') +@auth_blueprint.route('/data-ingest-board-logout') +def data_ingest_logout(): + return _logout(redirect_uri=current_app.config['DATA_INGEST_BOARD_APP_URI'], app_name=current_app.config['DATA_INGEST_BOARD_NAME'], key='ingest_board_tokens') def get_user_info(token): From aaca919bd8ebbee448613d503141bab11589e44f Mon Sep 17 00:00:00 2001 From: maxsibilla Date: Tue, 8 Aug 2023 10:01:58 -0400 Subject: [PATCH 08/14] Updating queries for and renaming donor to source in publish endpoint --- src/routes/entity_CRUD/__init__.py | 117 +++++++++-------------------- 1 file changed, 34 insertions(+), 83 deletions(-) diff --git a/src/routes/entity_CRUD/__init__.py b/src/routes/entity_CRUD/__init__.py index f2b3834b..be7e003c 100644 --- a/src/routes/entity_CRUD/__init__.py +++ b/src/routes/entity_CRUD/__init__.py @@ -534,7 +534,7 @@ def dataset_data_status(): assay_types_dict = Ontology.assay_types(prop_callback=None, as_data_dict=True) organ_types_dict = current_app.ubkg.get_ubkg_by_endpoint(current_app.ubkg.organ_types) all_datasets_query = ( - "MATCH (ds:Dataset)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(ancestor) " + "MATCH (ds:Dataset)-[:WAS_GENERATED_BY]->(:Activity)-[:USED]->(ancestor) " "RETURN ds.uuid AS uuid, ds.group_name AS group_name, ds.data_types AS data_types, " "ds.sennet_id AS sennet_id, ds.lab_dataset_id AS provider_experiment_id, ds.status AS status, " "ds.last_modified_timestamp AS last_touch, ds.data_access_level AS data_access_level, " @@ -543,45 +543,39 @@ def dataset_data_status(): ) organ_query = ( - "MATCH (ds:Dataset)<-[*]-(o:Sample {sample_category: 'organ'}) " - "WHERE (ds)<-[:ACTIVITY_OUTPUT]-(:Activity) " + "MATCH (ds:Dataset)-[*]->(o:Sample {sample_category: 'Organ'}) " + "WHERE (ds)-[:WAS_GENERATED_BY]->(:Activity) " "RETURN DISTINCT ds.uuid AS uuid, o.organ AS organ, o.sennet_id as organ_sennet_id, o.uuid as organ_uuid " ) - donor_query = ( - "MATCH (ds:Dataset)<-[*]-(dn:Donor) " - "WHERE (ds)<-[:ACTIVITY_OUTPUT]-(:Activity) " + source_query = ( + "MATCH (ds:Dataset)-[*]->(dn:Source) " + "WHERE (ds)-[:WAS_GENERATED_BY]->(:Activity) " "RETURN DISTINCT ds.uuid AS uuid, " - "COLLECT(DISTINCT dn.sennet_id) AS donor_sennet_id, COLLECT(DISTINCT dn.submission_id) AS donor_submission_id, " - "COLLECT(DISTINCT dn.lab_source_id) AS donor_lab_id, COALESCE(dn.metadata IS NOT NULL) AS has_metadata" + "COLLECT(DISTINCT dn.sennet_id) AS source_sennet_id, " + "COLLECT(DISTINCT dn.lab_source_id) AS source_lab_id, COALESCE(dn.metadata IS NOT NULL) AS has_metadata" ) descendant_datasets_query = ( - "MATCH (dds:Dataset)<-[*]-(ds:Dataset)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(:Sample) " + "MATCH (dds:Dataset)-[*]->(ds:Dataset)-[:WAS_GENERATED_BY]->(:Activity)-[:USED]->(:Sample) " "RETURN DISTINCT ds.uuid AS uuid, COLLECT(DISTINCT dds.sennet_id) AS descendant_datasets" ) - upload_query = ( - "MATCH (u:Upload)<-[:IN_UPLOAD]-(ds) " - "RETURN DISTINCT ds.uuid AS uuid, COLLECT(DISTINCT u.sennet_id) AS upload" - ) - has_rui_query = ( "MATCH (ds:Dataset) " - "WHERE (ds)<-[:ACTIVITY_OUTPUT]-(:Activity) " - "WITH ds, [(ds)<-[*]-(s:Sample) | s.rui_location] AS rui_locations " + "WHERE (ds)-[:WAS_GENERATED_BY]->(:Activity) " + "WITH ds, [(ds)-[*]->(s:Sample) | s.rui_location] AS rui_locations " "RETURN ds.uuid AS uuid, any(rui_location IN rui_locations WHERE rui_location IS NOT NULL) AS has_rui_info" ) displayed_fields = [ "sennet_id", "group_name", "status", "organ", "provider_experiment_id", "last_touch", "has_contacts", - "has_contributors", "data_types", "donor_sennet_id", "donor_submission_id", "donor_lab_id", + "has_contributors", "data_types", "source_sennet_id", "source_lab_id", "has_metadata", "descendant_datasets", "upload", "has_rui_info", "globus_url", "portal_url", "ingest_url", "has_data", "organ_sennet_id" ] - queries = [all_datasets_query, organ_query, donor_query, descendant_datasets_query, - upload_query, has_rui_query] + queries = [all_datasets_query, organ_query, source_query, descendant_datasets_query, has_rui_query] results = [None] * len(queries) threads = [] for i, query in enumerate(queries): @@ -594,7 +588,7 @@ def dataset_data_status(): # Here we specifically indexed the values in 'results' in case certain threads completed out of order all_datasets_result = results[0] organ_result = results[1] - donor_result = results[2] + source_result = results[2] descendant_datasets_result = results[3] upload_result = results[4] has_rui_result = results[5] @@ -606,11 +600,11 @@ def dataset_data_status(): output_dict[dataset['uuid']]['organ'] = dataset['organ'] output_dict[dataset['uuid']]['organ_sennet_id'] = dataset['organ_sennet_id'] output_dict[dataset['uuid']]['organ_uuid'] = dataset['organ_uuid'] - for dataset in donor_result: + for dataset in source_result: if output_dict.get(dataset['uuid']): - output_dict[dataset['uuid']]['donor_sennet_id'] = dataset['donor_sennet_id'] - output_dict[dataset['uuid']]['donor_submission_id'] = dataset['donor_submission_id'] - output_dict[dataset['uuid']]['donor_lab_id'] = dataset['donor_lab_id'] + output_dict[dataset['uuid']]['source_sennet_id'] = dataset['source_sennet_id'] + output_dict[dataset['uuid']]['source_submission_id'] = dataset['source_submission_id'] + output_dict[dataset['uuid']]['source_lab_id'] = dataset['source_lab_id'] output_dict[dataset['uuid']]['has_metadata'] = dataset['has_metadata'] for dataset in descendant_datasets_result: if output_dict.get(dataset['uuid']): @@ -672,49 +666,6 @@ def dataset_data_status(): return jsonify(combined_results) -""" -Description -""" -@entity_CRUD_blueprint.route('/uploads/data-status', methods=['GET']) -def upload_data_status(): - all_uploads_query = ( - "MATCH (up:Upload) " - "OPTIONAL MATCH (up)<-[:IN_UPLOAD]-(ds:Dataset) " - "RETURN up.uuid AS uuid, up.group_name AS group_name, up.sennet_id AS sennet_id, up.status AS status, " - "up.title AS title, COLLECT(DISTINCT ds.uuid) AS datasets " - ) - - displayed_fields = [ - "uuid", "group_name", "sennet_id", "status", "title", "datasets" - ] - - with current_app.neo4j_driver_instance.session() as session: - results = session.run(all_uploads_query).data() - for upload in results: - globus_url = get_globus_url('protected', upload.get('group_name'), upload.get('uuid')) - upload['globus_url'] = globus_url - ingest_url = commons_file_helper.ensureTrailingSlashURL(current_app.config['INGEST_URL']) + 'upload' + '/' + upload[ - 'uuid'] - upload['ingest_url'] = ingest_url - for prop in upload: - if isinstance(upload[prop], list): - upload[prop] = ", ".join(upload[prop]) - if isinstance(upload[prop], (bool, int)): - upload[prop] = str(upload[prop]) - if upload[prop] and upload[prop][0] == "[" and upload[prop][-1] == "]": - upload[prop] = upload[prop].replace("'",'"') - upload[prop] = json.loads(upload[prop]) - upload[prop] = upload[prop][0] - if upload[prop] is None: - upload[prop] = " " - for field in displayed_fields: - if upload.get(field) is None: - upload[field] = " " - # TODO: Once url parameters are implemented in the front-end for the data-status dashboard, we'll need to return a - # TODO: link to the datasets page only displaying datasets belonging to a given upload. - return jsonify(results) - - @entity_CRUD_blueprint.route('/datasets//publish', methods = ['PUT']) def publish_datastage(identifier): try: @@ -747,7 +698,7 @@ def publish_datastage(identifier): if suspend_indexing_and_acls: no_indexing_and_acls = True - donors_to_reindex = [] + sources_to_reindex = [] with current_app.neo4j_driver_instance.session() as neo_session: #recds = session.run("Match () Return 1 Limit 1") #for recd in recds: @@ -758,11 +709,11 @@ def publish_datastage(identifier): #look at all of the ancestors #gather uuids of ancestors that need to be switched to public access_level - #grab the id of the donor ancestor to use for reindexing + #grab the id of the source ancestor to use for reindexing q = f"MATCH (dataset:Dataset {{uuid: '{dataset_uuid}'}})-[:WAS_GENERATED_BY]->(e1)-[:USED|WAS_GENERATED_BY*]->(all_ancestors:Entity) RETURN distinct all_ancestors.uuid as uuid, all_ancestors.entity_type as entity_type, all_ancestors.data_types as data_types, all_ancestors.data_access_level as data_access_level, all_ancestors.status as status, all_ancestors.metadata as metadata" rval = neo_session.run(q).data() uuids_for_public = [] - has_donor = False + has_source = False for node in rval: uuid = node['uuid'] entity_type = node['entity_type'] @@ -773,10 +724,10 @@ def publish_datastage(identifier): if data_access_level != 'public': uuids_for_public.append(uuid) elif entity_type == 'Source': - has_donor = True + has_source = True if is_primary: if metadata is None or metadata.strip() == '': - return jsonify({"error": f"donor.metadata is missing for {dataset_uuid}"}), 400 + return jsonify({"error": f"source.metadata is missing for {dataset_uuid}"}), 400 metadata = metadata.replace("'", '"') metadata_dict = json.loads(metadata) living_donor = True @@ -786,18 +737,18 @@ def publish_datastage(identifier): if metadata_dict.get('living_donor_data') is None: organ_donor = False if (organ_donor and living_donor) or (not organ_donor and not living_donor): - return jsonify({"error": f"donor.metadata.organ_donor_data or " - f"donor.metadata.living_donor_data required. " + return jsonify({"error": f"source.metadata.organ_donor_data or " + f"source.metadata.living_donor_data required. " f"Both cannot be None. Both cannot be present. Only one."}), 400 - donors_to_reindex.append(uuid) + sources_to_reindex.append(uuid) if data_access_level != 'public': uuids_for_public.append(uuid) elif entity_type == 'Dataset': if status != 'Published': return Response(f"{dataset_uuid} has an ancestor dataset that has not been Published. Will not Publish. Ancestor dataset is: {uuid}", 400) - if has_donor is False: - return Response(f"{dataset_uuid}: no donor found for dataset, will not Publish") + if has_source is False: + return Response(f"{dataset_uuid}: no source found for dataset, will not Publish") #get info for the dataset to be published q = f"MATCH (e:Dataset {{uuid: '{dataset_uuid}'}}) RETURN e.uuid as uuid, e.entity_type as entitytype, e.status as status, e.data_access_level as data_access_level, e.group_uuid as group_uuid, e.contacts as contacts, e.contributors as contributors" @@ -894,17 +845,17 @@ def publish_datastage(identifier): # out = entity_instance.clear_cache(e_id) if no_indexing_and_acls: - r_val = {'acl_cmd': acls_cmd, 'donors_for_indexing': donors_to_reindex} + r_val = {'sources_for_indexing': sources_to_reindex} else: - r_val = {'acl_cmd': '', 'donors_for_indexing': []} + r_val = {'acl_cmd': '', 'sources_for_indexing': []} if not no_indexing_and_acls: - for donor_uuid in donors_to_reindex: + for source_uuid in sources_to_reindex: try: - rspn = requests.put(current_app.config['SEARCH_WEBSERVICE_URL'] + "/reindex/" + donor_uuid, headers={'Authorization': request.headers["AUTHORIZATION"]}) - logger.info(f"Publishing {identifier} indexed donor {donor_uuid} with status {rspn.status_code}") + rspn = requests.put(current_app.config['SEARCH_WEBSERVICE_URL'] + "/reindex/" + source_uuid, headers={'Authorization': request.headers["AUTHORIZATION"]}) + logger.info(f"Publishing {identifier} indexed source {source_uuid} with status {rspn.status_code}") except: - logger.exception(f"While publishing {identifier} Error happened when calling reindex web service for donor {donor_uuid}") + logger.exception(f"While publishing {identifier} Error happened when calling reindex web service for source {source_uuid}") return Response(json.dumps(r_val), 200, mimetype='application/json') From 780623fedb3502ca623db0b0b5edf90fb22880c6 Mon Sep 17 00:00:00 2001 From: Lisa-Ann B Date: Tue, 15 Aug 2023 10:48:53 -0400 Subject: [PATCH 09/14] Update Ontology call --- src/routes/entity_CRUD/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/routes/entity_CRUD/__init__.py b/src/routes/entity_CRUD/__init__.py index 0550971d..f0a7dddf 100644 --- a/src/routes/entity_CRUD/__init__.py +++ b/src/routes/entity_CRUD/__init__.py @@ -533,7 +533,7 @@ def run_query(query, results, i): """ @entity_CRUD_blueprint.route('/datasets/data-status', methods=['GET']) def dataset_data_status(): - assay_types_dict = Ontology.assay_types(prop_callback=None, as_data_dict=True) + assay_types_dict = Ontology.ops(prop_callback=None, as_data_dict=True).assay_types() organ_types_dict = current_app.ubkg.get_ubkg_by_endpoint(current_app.ubkg.organ_types) all_datasets_query = ( "MATCH (ds:Dataset)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(ancestor) " From 5d1a46b20a9d0ca33db2ddb163b00291c04e1a4b Mon Sep 17 00:00:00 2001 From: maxsibilla Date: Tue, 15 Aug 2023 10:59:40 -0400 Subject: [PATCH 10/14] Removing unused references to upload_query --- src/routes/entity_CRUD/__init__.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/routes/entity_CRUD/__init__.py b/src/routes/entity_CRUD/__init__.py index be7e003c..76251e0d 100644 --- a/src/routes/entity_CRUD/__init__.py +++ b/src/routes/entity_CRUD/__init__.py @@ -590,8 +590,7 @@ def dataset_data_status(): organ_result = results[1] source_result = results[2] descendant_datasets_result = results[3] - upload_result = results[4] - has_rui_result = results[5] + has_rui_result = results[4] for dataset in all_datasets_result: output_dict[dataset['uuid']] = dataset @@ -609,9 +608,6 @@ def dataset_data_status(): for dataset in descendant_datasets_result: if output_dict.get(dataset['uuid']): output_dict[dataset['uuid']]['descendant_datasets'] = dataset['descendant_datasets'] - for dataset in upload_result: - if output_dict.get(dataset['uuid']): - output_dict[dataset['uuid']]['upload'] = dataset['upload'] for dataset in has_rui_result: if output_dict.get(dataset['uuid']): output_dict[dataset['uuid']]['has_rui_info'] = dataset['has_rui_info'] From b0f2abd9cb9b13eb1bcd58e1560c6b01c8135f36 Mon Sep 17 00:00:00 2001 From: Lisa-Ann B Date: Tue, 15 Aug 2023 11:26:54 -0400 Subject: [PATCH 11/14] Make neo4j work for now ... --- src/app.py | 3 ++- src/lib/neo4j_helper.py | 26 ++++++++++++++++++++++++++ src/routes/entity_CRUD/__init__.py | 7 ++++--- 3 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 src/lib/neo4j_helper.py diff --git a/src/app.py b/src/app.py index e6cc4af0..2cef9da3 100644 --- a/src/app.py +++ b/src/app.py @@ -22,6 +22,7 @@ # Local Modules from lib.file_upload_helper import UploadFileHelper +from lib.neo4j_helper import Neo4jHelper # Set logging format and level (default is warning) # All the API logging is forwarded to the uWSGI server and gets written into the log file `uwsgi-ingest-api.log` @@ -95,7 +96,7 @@ app.config['NEO4J_USERNAME'], app.config['NEO4J_PASSWORD']) - app.neo4j_driver_instance = neo4j_driver_instance + Neo4jHelper.set_instance(neo4j_driver_instance) logger.info("Initialized neo4j_driver module successfully :)") except Exception: diff --git a/src/lib/neo4j_helper.py b/src/lib/neo4j_helper.py new file mode 100644 index 00000000..a706d476 --- /dev/null +++ b/src/lib/neo4j_helper.py @@ -0,0 +1,26 @@ +from neo4j import Driver + +neo4j_driver_instance = None + + +class Neo4jHelper: + @staticmethod + def set_instance(neo4j): + global neo4j_driver_instance + neo4j_driver_instance = neo4j + + @staticmethod + def get_instance(): + global neo4j_driver_instance + return neo4j_driver_instance + + @staticmethod + def close(): + # Specify as module-scope variable + global neo4j_driver_instance + + if isinstance(neo4j_driver_instance, Driver): + neo4j_driver_instance.close() + neo4j_driver_instance = None + else: + raise TypeError("The private module variable '_driver' is not a neo4j.Driver object") \ No newline at end of file diff --git a/src/routes/entity_CRUD/__init__.py b/src/routes/entity_CRUD/__init__.py index f6df1141..bca11230 100644 --- a/src/routes/entity_CRUD/__init__.py +++ b/src/routes/entity_CRUD/__init__.py @@ -23,6 +23,7 @@ from lib.file_upload_helper import UploadFileHelper from lib import get_globus_url from lib.datacite_doi_helper import DataCiteDoiHelper +from lib.neo4j_helper import Neo4jHelper entity_CRUD_blueprint = Blueprint('entity_CRUD', __name__) @@ -525,7 +526,7 @@ def update_ingest_status(): def run_query(query, results, i): logger.info(query) - with current_app.neo4j_driver_instance.session() as session: + with Neo4jHelper.get_instance().session() as session: results[i] = session.run(query).data() """ @@ -697,7 +698,7 @@ def publish_datastage(identifier): no_indexing_and_acls = True sources_to_reindex = [] - with current_app.neo4j_driver_instance.session() as neo_session: + with Neo4jHelper.get_instance().session() as neo_session: #recds = session.run("Match () Return 1 Limit 1") #for recd in recds: # if recd[0] == 1: @@ -867,7 +868,7 @@ def publish_datastage(identifier): def dataset_is_primary(dataset_uuid): - with current_app.neo4j_driver_instance.session() as neo_session: + with Neo4jHelper.get_instance().session() as neo_session: q = (f"MATCH (ds:Dataset {{uuid: '{dataset_uuid}'}})-[:WAS_GENERATED_BY]->(:Activity)-[:USED]->(s:Sample) RETURN ds.uuid") result = neo_session.run(q).data() if len(result) == 0: From 095ec9edd8c1d5e51931b841c9075e81f9ad90e4 Mon Sep 17 00:00:00 2001 From: Lisa-Ann B Date: Tue, 15 Aug 2023 11:49:49 -0400 Subject: [PATCH 12/14] Add missing config values --- src/instance/app.cfg.example | 9 +++++++++ src/lib/__init__.py | 6 +++--- src/routes/entity_CRUD/__init__.py | 4 ++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/instance/app.cfg.example b/src/instance/app.cfg.example index f30a2b68..5285b8d0 100644 --- a/src/instance/app.cfg.example +++ b/src/instance/app.cfg.example @@ -89,3 +89,12 @@ UBKG_SERVER = 'https://ontology.api.hubmapconsortium.org/' UBKG_ENDPOINT_VALUESET = 'valueset?parent_sab=SENNET&parent_code={code}&child_sabs=SENNET' UBKG_CODES = '{"specimen_categories":"C020076", "organ_types":{"code": "C000008", "key": "organs", "endpoint": "organs?application_context=SENNET"}, "entities": "C000012", "source_types":"C050020", "assay_types":{"code": "C004000", "key": "datasets", "endpoint": "datasets?application_context=SENNET"}}' +GLOBUS_PUBLIC_ENDPOINT_UUID = 'a00a00b8-d1d1-4ab9-b915-0bada54c27a3' +GLOBUS_CONSORTIUM_ENDPOINT_UUID = '3cb7d673-a3db-40e9-8376-f2ead6cb5a45' +GLOBUS_PROTECTED_ENDPOINT_UUID = 'bdaf8547-aab3-4142-97bd-0a16d5cd9f58' + +# The base url for the HuBMAP portal website +PORTAL_URL = 'https://data.dev.sennetconsortium.org/' + +# The base url for the HuBMAP ingest website +INGEST_URL = 'https://data.dev.sennetconsortium.org/' \ No newline at end of file diff --git a/src/lib/__init__.py b/src/lib/__init__.py index 7a1f560b..25d98242 100644 --- a/src/lib/__init__.py +++ b/src/lib/__init__.py @@ -8,17 +8,17 @@ def get_globus_url(data_access_level, group_name, uuid): # public access if data_access_level == "public": globus_server_uuid = current_app.config['GLOBUS_PUBLIC_ENDPOINT_UUID'] - access_dir = commons_file_helper.ensureTrailingSlashURL(current_app.config['PUBLIC_DATA_SUBDIR']) + access_dir = commons_file_helper.ensureTrailingSlashURL(current_app.config['ACCESS_LEVEL_PUBLIC']) dir_path = dir_path + access_dir + "/" # consortium access elif data_access_level == 'consortium': globus_server_uuid = current_app.config['GLOBUS_CONSORTIUM_ENDPOINT_UUID'] - access_dir = commons_file_helper.ensureTrailingSlashURL(current_app.config['CONSORTIUM_DATA_SUBDIR']) + access_dir = commons_file_helper.ensureTrailingSlashURL(current_app.config['ACCESS_LEVEL_CONSORTIUM']) dir_path = dir_path + access_dir + group_name + "/" # protected access elif data_access_level == 'protected': globus_server_uuid = current_app.config['GLOBUS_PROTECTED_ENDPOINT_UUID'] - access_dir = commons_file_helper.ensureTrailingSlashURL(current_app.config['PROTECTED_DATA_SUBDIR']) + access_dir = commons_file_helper.ensureTrailingSlashURL(current_app.config['ACCESS_LEVEL_PROTECTED']) dir_path = dir_path + access_dir + group_name + "/" if globus_server_uuid is not None: diff --git a/src/routes/entity_CRUD/__init__.py b/src/routes/entity_CRUD/__init__.py index bca11230..b30068eb 100644 --- a/src/routes/entity_CRUD/__init__.py +++ b/src/routes/entity_CRUD/__init__.py @@ -534,7 +534,7 @@ def run_query(query, results, i): """ @entity_CRUD_blueprint.route('/datasets/data-status', methods=['GET']) def dataset_data_status(): - assay_types_dict = Ontology.ops(prop_callback=None, as_data_dict=True).assay_types() + assay_types_dict = Ontology.ops(prop_callback=None, as_data_dict=True, data_as_val=True).assay_types() organ_types_dict = current_app.ubkg.get_ubkg_by_endpoint(current_app.ubkg.organ_types) all_datasets_query = ( "MATCH (ds:Dataset)-[:WAS_GENERATED_BY]->(:Activity)-[:USED]->(ancestor) " @@ -605,7 +605,7 @@ def dataset_data_status(): for dataset in source_result: if output_dict.get(dataset['uuid']): output_dict[dataset['uuid']]['source_sennet_id'] = dataset['source_sennet_id'] - output_dict[dataset['uuid']]['source_submission_id'] = dataset['source_submission_id'] + # output_dict[dataset['uuid']]['source_submission_id'] = dataset['source_submission_id'] output_dict[dataset['uuid']]['source_lab_id'] = dataset['source_lab_id'] output_dict[dataset['uuid']]['has_metadata'] = dataset['has_metadata'] for dataset in descendant_datasets_result: From f6c420fbdddb8ad734f814acbadc25e9d29aee09 Mon Sep 17 00:00:00 2001 From: Lisa-Ann B Date: Tue, 15 Aug 2023 15:31:50 -0400 Subject: [PATCH 13/14] Add GLOBUS_APP_BASE_URL --- src/instance/app.cfg.example | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/instance/app.cfg.example b/src/instance/app.cfg.example index 5285b8d0..0d971aa2 100644 --- a/src/instance/app.cfg.example +++ b/src/instance/app.cfg.example @@ -97,4 +97,6 @@ GLOBUS_PROTECTED_ENDPOINT_UUID = 'bdaf8547-aab3-4142-97bd-0a16d5cd9f58' PORTAL_URL = 'https://data.dev.sennetconsortium.org/' # The base url for the HuBMAP ingest website -INGEST_URL = 'https://data.dev.sennetconsortium.org/' \ No newline at end of file +INGEST_URL = 'https://data.dev.sennetconsortium.org/' + +GLOBUS_APP_BASE_URL = 'https://app.globus.org' \ No newline at end of file From f07a702459fc1b3e3d5d3c82fed7750e73f929c1 Mon Sep 17 00:00:00 2001 From: maxsibilla Date: Tue, 15 Aug 2023 15:58:18 -0400 Subject: [PATCH 14/14] Updating app.cfg.example --- src/instance/app.cfg.example | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/instance/app.cfg.example b/src/instance/app.cfg.example index 0d971aa2..cc8101b3 100644 --- a/src/instance/app.cfg.example +++ b/src/instance/app.cfg.example @@ -37,6 +37,14 @@ UUID_WEBSERVICE_URL = 'http://uuid-api:8080' #Search-api SEARCH_WEBSERVICE_URL = 'https://search-api.dev.sennetconsortium.org' +# The base url for the HuBMAP portal website +PORTAL_URL = 'https://data.dev.sennetconsortium.org/' + +# The base url for the HuBMAP ingest website +INGEST_URL = 'https://ingest-api.dev.sennetconsortium.org/' + +GLOBUS_APP_BASE_URL = 'https://app.globus.org' + # Directory where file uploads will be placed temporarily # until they are committed # Remember to set the proper file system user and group permission @@ -88,15 +96,3 @@ DATACITE_SENNET_PREFIX = '' UBKG_SERVER = 'https://ontology.api.hubmapconsortium.org/' UBKG_ENDPOINT_VALUESET = 'valueset?parent_sab=SENNET&parent_code={code}&child_sabs=SENNET' UBKG_CODES = '{"specimen_categories":"C020076", "organ_types":{"code": "C000008", "key": "organs", "endpoint": "organs?application_context=SENNET"}, "entities": "C000012", "source_types":"C050020", "assay_types":{"code": "C004000", "key": "datasets", "endpoint": "datasets?application_context=SENNET"}}' - -GLOBUS_PUBLIC_ENDPOINT_UUID = 'a00a00b8-d1d1-4ab9-b915-0bada54c27a3' -GLOBUS_CONSORTIUM_ENDPOINT_UUID = '3cb7d673-a3db-40e9-8376-f2ead6cb5a45' -GLOBUS_PROTECTED_ENDPOINT_UUID = 'bdaf8547-aab3-4142-97bd-0a16d5cd9f58' - -# The base url for the HuBMAP portal website -PORTAL_URL = 'https://data.dev.sennetconsortium.org/' - -# The base url for the HuBMAP ingest website -INGEST_URL = 'https://data.dev.sennetconsortium.org/' - -GLOBUS_APP_BASE_URL = 'https://app.globus.org' \ No newline at end of file