From 55010a630813d6fc835cfd688dd19b1dacdb6f0c Mon Sep 17 00:00:00 2001 From: Kyle Swanson Date: Mon, 11 Dec 2023 22:00:06 -0600 Subject: [PATCH] DrugBank column fix --- admet_ai/constants.py | 5 +++++ admet_ai/web/app/drugbank.py | 19 ++++++++++++++++--- scripts/get_drugbank_approved.py | 5 +++-- scripts/tdc_constants.py | 5 ----- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/admet_ai/constants.py b/admet_ai/constants.py index 20e9e0c..1862a33 100644 --- a/admet_ai/constants.py +++ b/admet_ai/constants.py @@ -9,5 +9,10 @@ DEFAULT_MODELS_DIR = resources_dir / "models" # DrugBank columns +DRUGBANK_ID_COLUMN = "id" +DRUGBANK_NAME_COLUMN = "name" +DRUGBANK_SMILES_COLUMN = "smiles" +DRUGBANK_ATC_PREFIX = "atc" DRUGBANK_ATC_NAME_PREFIX = "atc_name" +DRUGBANK_ATC_CODE_COLUMN = DRUGBANK_ATC_PREFIX DRUGBANK_DELIMITER = ";" diff --git a/admet_ai/web/app/drugbank.py b/admet_ai/web/app/drugbank.py index c21300d..1f1224a 100644 --- a/admet_ai/web/app/drugbank.py +++ b/admet_ai/web/app/drugbank.py @@ -5,7 +5,14 @@ import matplotlib import pandas as pd -from admet_ai.constants import DRUGBANK_ATC_NAME_PREFIX, DRUGBANK_DELIMITER +from admet_ai.constants import ( + DRUGBANK_ATC_NAME_PREFIX, + DRUGBANK_ATC_PREFIX, + DRUGBANK_DELIMITER, + DRUGBANK_ID_COLUMN, + DRUGBANK_NAME_COLUMN, + DRUGBANK_SMILES_COLUMN, +) from admet_ai.web.app import app from admet_ai.web.app.admet_info import get_admet_id_to_name @@ -95,8 +102,14 @@ def get_drugbank_tasks_ids() -> list[str]: :return: A list of tasks (properties) predicted in the DrugBank reference set. """ - non_task_columns = ["name", "smiles"] + [ - column for column in DRUGBANK_DF.columns if column.startswith("atc_") + non_task_columns = [ + DRUGBANK_ID_COLUMN, + DRUGBANK_NAME_COLUMN, + DRUGBANK_SMILES_COLUMN, + ] + [ + column + for column in DRUGBANK_DF.columns + if column.startswith(DRUGBANK_ATC_PREFIX) ] task_columns = set(DRUGBANK_DF.columns) - set(non_task_columns) drugbank_task_ids = sorted(task_columns) diff --git a/scripts/get_drugbank_approved.py b/scripts/get_drugbank_approved.py index 12134ca..278b1cf 100644 --- a/scripts/get_drugbank_approved.py +++ b/scripts/get_drugbank_approved.py @@ -6,9 +6,10 @@ from rdkit import Chem from tqdm import tqdm -from admet_ai.constants import DRUGBANK_ATC_NAME_PREFIX, DRUGBANK_DELIMITER -from tdc_constants import ( +from admet_ai.constants import ( DRUGBANK_ATC_CODE_COLUMN, + DRUGBANK_ATC_NAME_PREFIX, + DRUGBANK_DELIMITER, DRUGBANK_ID_COLUMN, DRUGBANK_NAME_COLUMN, DRUGBANK_SMILES_COLUMN, diff --git a/scripts/tdc_constants.py b/scripts/tdc_constants.py index 18c5723..8c50e92 100644 --- a/scripts/tdc_constants.py +++ b/scripts/tdc_constants.py @@ -103,8 +103,3 @@ ADMET_ALL_SMILES_COLUMN = "smiles" ADMET_GROUP_SMILES_COLUMN = "Drug" ADMET_GROUP_TARGET_COLUMN = "Y" - -DRUGBANK_ID_COLUMN = "id" -DRUGBANK_NAME_COLUMN = "name" -DRUGBANK_SMILES_COLUMN = "smiles" -DRUGBANK_ATC_CODE_COLUMN = "atc"