Skip to content

Commit

Permalink
Merge pull request #95 from OpenDrugDiscovery/issue_90
Browse files Browse the repository at this point in the history
Moved sanitization in cli
  • Loading branch information
prtos authored Jun 8, 2024
2 parents 032aa71 + 0566260 commit 851a447
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 48 deletions.
26 changes: 16 additions & 10 deletions openqdc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,15 @@
app = typer.Typer(help="OpenQDC CLI")


def sanitize(dictionary):
return {k.lower().replace("_", "").replace("-", ""): v for k, v in dictionary.items()}


SANITIZED_AVAILABLE_DATASETS = sanitize(AVAILABLE_DATASETS)


def exist_dataset(dataset):
if dataset not in AVAILABLE_DATASETS:
if dataset not in sanitize(AVAILABLE_DATASETS):
logger.error(f"{dataset} is not available. Please open an issue on Github for the team to look into it.")
return False
return True
Expand Down Expand Up @@ -57,10 +64,10 @@ def download(
"""
for dataset in list(map(lambda x: x.lower().replace("_", ""), datasets)):
if exist_dataset(dataset):
if AVAILABLE_DATASETS[dataset].no_init().is_cached() and not overwrite:
if SANITIZED_AVAILABLE_DATASETS[dataset].no_init().is_cached() and not overwrite:
logger.info(f"{dataset} is already cached. Skipping download")
else:
AVAILABLE_DATASETS[dataset](overwrite_local_cache=True, cache_dir=cache_dir)
SANITIZED_AVAILABLE_DATASETS[dataset](overwrite_local_cache=True, cache_dir=cache_dir)


@app.command()
Expand Down Expand Up @@ -115,18 +122,17 @@ def fetch(
openqdc fetch Spice
"""
if datasets[0].lower() == "all":
dataset_names = AVAILABLE_DATASETS
dataset_names = list(sanitize(AVAILABLE_DATASETS).keys())
elif datasets[0].lower() == "potential":
dataset_names = AVAILABLE_POTENTIAL_DATASETS
dataset_names = list(sanitize(AVAILABLE_POTENTIAL_DATASETS).keys())
elif datasets[0].lower() == "interaction":
dataset_names = AVAILABLE_INTERACTION_DATASETS
dataset_names = list(sanitize(AVAILABLE_INTERACTION_DATASETS).keys())
else:
dataset_names = datasets

for dataset in list(map(lambda x: x.lower().replace("_", ""), dataset_names)):
if exist_dataset(dataset):
try:
AVAILABLE_DATASETS[dataset].fetch(cache_dir, overwrite)
SANITIZED_AVAILABLE_DATASETS[dataset].fetch(cache_dir, overwrite)
except Exception as e:
logger.error(f"Something unexpected happended while fetching {dataset}: {repr(e)}")

Expand All @@ -152,9 +158,9 @@ def preprocess(
"""
for dataset in list(map(lambda x: x.lower().replace("_", ""), datasets)):
if exist_dataset(dataset):
logger.info(f"Preprocessing {AVAILABLE_DATASETS[dataset].__name__}")
logger.info(f"Preprocessing {SANITIZED_AVAILABLE_DATASETS[dataset].__name__}")
try:
AVAILABLE_DATASETS[dataset].no_init().preprocess(upload=upload, overwrite=overwrite)
SANITIZED_AVAILABLE_DATASETS[dataset].no_init().preprocess(upload=upload, overwrite=overwrite)
except Exception as e:
logger.error(f"Error while preprocessing {dataset}. {e}. Did you fetch the dataset first?")
raise e
Expand Down
16 changes: 8 additions & 8 deletions openqdc/datasets/interaction/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
from .x40 import X40

AVAILABLE_INTERACTION_DATASETS = {
"des5m": DES5M,
"des370k": DES370K,
"dess66": DESS66,
"dess66x8": DESS66x8,
"l7": L7,
"metcalf": Metcalf,
"splinter": Splinter,
"x40": X40,
"DES5M": DES5M,
"DES370K": DES370K,
"DESS66": DESS66,
"DESS66x8": DESS66x8,
"L7": L7,
"Metcalf": Metcalf,
"Splinter": Splinter,
"X40": X40,
}
59 changes: 29 additions & 30 deletions openqdc/datasets/potential/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,34 +21,33 @@
from .waterclusters3_30 import WaterClusters

AVAILABLE_POTENTIAL_DATASETS = {
"ani1": ANI1,
"ani1ccx": ANI1CCX,
"ani1ccxv2": ANI1CCX_V2,
"ani2": ANI2,
"ani1x": ANI1X,
"comp6": COMP6,
"gdml": GDML,
"geom": GEOM,
"iso17": ISO17,
"molecule3d": Molecule3D,
"nabladft": NablaDFT,
"orbnetdenali": OrbnetDenali,
"pcqmb3lyp": PCQM_B3LYP,
"pcqmpm6": PCQM_PM6,
"qm7x": QM7X,
"qm7xv2": QM7X_V2,
"qmugs": QMugs,
"qmugsv2": QMugs_V2,
"sn2rxn": SN2RXN,
"solvatedpeptides": SolvatedPeptides,
"spice": Spice,
"spicev2": SpiceV2,
"spicevl2": SpiceVL2,
"tmqm": TMQM,
"transition1x": Transition1X,
"watercluster": WaterClusters,
"multixcqm9": MultixcQM9,
"multixcqm9v2": MultixcQM9_V2,
"revmd17": RevMD17,
"md22": MD22,
"ANI1": ANI1,
"ANI1CCX": ANI1CCX,
"ANI1CCX_V2": ANI1CCX_V2,
"ANI1X": ANI1X,
"COMP6": COMP6,
"GDML": GDML,
"GEOM": GEOM,
"ISO17": ISO17,
"Molecule3D": Molecule3D,
"NablaDFT": NablaDFT,
"OrbnetDenali": OrbnetDenali,
"PCQM_B3LYP": PCQM_B3LYP,
"PCQM_PM6": PCQM_PM6,
"QM7X": QM7X,
"QM7X_V2": QM7X_V2,
"QMugs": QMugs,
"QMugs_V2": QMugs_V2,
"SN2RXN": SN2RXN,
"SolvatedPeptides": SolvatedPeptides,
"Spice": Spice,
"SpiceV2": SpiceV2,
"SpiceVL2": SpiceVL2,
"TMQM": TMQM,
"Transition1X": Transition1X,
"WaterClusters": WaterClusters,
"MultixcQM9": MultixcQM9,
"MultixcQM9_V2": MultixcQM9_V2,
"RevMD17": RevMD17,
"MD22": MD22,
}

0 comments on commit 851a447

Please sign in to comment.