-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #38 from OpenDrugDiscovery/refactoring
Refactoring
- Loading branch information
Showing
51 changed files
with
1,384 additions
and
608 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import importlib | ||
import os | ||
from typing import TYPE_CHECKING # noqa F401 | ||
|
||
# The below lazy import logic is coming from openff-toolkit: | ||
# https://github.com/openforcefield/openff-toolkit/blob/b52879569a0344878c40248ceb3bd0f90348076a/openff/toolkit/__init__.py#L44 | ||
|
||
# Dictionary of objects to lazily import; maps the object's name to its module path | ||
|
||
_lazy_imports_obj = { | ||
"__version__": "openqdc._version", | ||
"BaseDataset": "openqdc.datasets.base", | ||
"ANI1": "openqdc.datasets.potential.ani", | ||
"ANI1CCX": "openqdc.datasets.potential.ani", | ||
"ANI1X": "openqdc.datasets.potential.ani", | ||
"Spice": "openqdc.datasets.potential.spice", | ||
"GEOM": "openqdc.datasets.potential.geom", | ||
"QMugs": "openqdc.datasets.potential.qmugs", | ||
"ISO17": "openqdc.datasets.potential.iso_17", | ||
"COMP6": "openqdc.datasets.potential.comp6", | ||
"GDML": "openqdc.datasets.potential.gdml", | ||
"Molecule3D": "openqdc.datasets.potential.molecule3d", | ||
"OrbnetDenali": "openqdc.datasets.potential.orbnet_denali", | ||
"SN2RXN": "openqdc.datasets.potential.sn2_rxn", | ||
"QM7X": "openqdc.datasets.potential.qm7x", | ||
"DES": "openqdc.datasets.interaction.des", | ||
"NablaDFT": "openqdc.datasets.potential.nabladft", | ||
"SolvatedPeptides": "openqdc.datasets.potential.solvated_peptides", | ||
"WaterClusters": "openqdc.datasets.potential.waterclusters3_30", | ||
"TMQM": "openqdc.datasets.potential.tmqm", | ||
"Dummy": "openqdc.datasets.potential.dummy", | ||
"PCQM_B3LYP": "openqdc.datasets.potential.pcqm", | ||
"PCQM_PM6": "openqdc.datasets.potential.pcqm", | ||
"Transition1X": "openqdc.datasets.potential.transition1x", | ||
"AVAILABLE_DATASETS": "openqdc.datasets", | ||
} | ||
|
||
_lazy_imports_mod = {"datasets": "openqdc.datasets", "utils": "openqdc.utils"} | ||
|
||
|
||
def __getattr__(name): | ||
"""Lazily import objects from _lazy_imports_obj or _lazy_imports_mod | ||
Note that this method is only called by Python if the name cannot be found | ||
in the current module.""" | ||
obj_mod = _lazy_imports_obj.get(name) | ||
if obj_mod is not None: | ||
mod = importlib.import_module(obj_mod) | ||
return mod.__dict__[name] | ||
|
||
lazy_mod = _lazy_imports_mod.get(name) | ||
if lazy_mod is not None: | ||
return importlib.import_module(lazy_mod) | ||
|
||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}") | ||
|
||
|
||
def __dir__(): | ||
"""Add _lazy_imports_obj and _lazy_imports_mod to dir(<module>)""" | ||
keys = (*globals().keys(), *_lazy_imports_obj.keys(), *_lazy_imports_mod.keys()) | ||
return sorted(keys) | ||
|
||
|
||
if TYPE_CHECKING or os.environ.get("OPENQDC_DISABLE_LAZY_LOADING", "0") == "1": | ||
# These types are imported lazily at runtime, but we need to tell type | ||
# checkers what they are. | ||
from ._version import __version__ # noqa | ||
from .datasets import AVAILABLE_DATASETS # noqa | ||
from .datasets.base import BaseDataset # noqa | ||
from .datasets.interaction.des import DES # noqa | ||
from .datasets.potential.ani import ANI1, ANI1CCX, ANI1X # noqa | ||
from .datasets.potential.comp6 import COMP6 # noqa | ||
from .datasets.potential.dummy import Dummy # noqa | ||
from .datasets.potential.gdml import GDML # noqa | ||
from .datasets.potential.geom import GEOM # noqa | ||
from .datasets.potential.iso_17 import ISO17 # noqa | ||
from .datasets.potential.molecule3d import Molecule3D # noqa | ||
from .datasets.potential.nabladft import NablaDFT # noqa | ||
from .datasets.potential.orbnet_denali import OrbnetDenali # noqa | ||
from .datasets.potential.pcqm import PCQM_B3LYP, PCQM_PM6 # noqa | ||
from .datasets.potential.qm7x import QM7X # noqa | ||
from .datasets.potential.qmugs import QMugs # noqa | ||
from .datasets.potential.sn2_rxn import SN2RXN # noqa | ||
from .datasets.potential.solvated_peptides import SolvatedPeptides # noqa | ||
from .datasets.potential.spice import Spice # noqa | ||
from .datasets.potential.tmqm import TMQM # noqa | ||
from .datasets.potential.transition1x import Transition1X # noqa | ||
from .datasets.potential.waterclusters3_30 import WaterClusters # noqa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
try: | ||
from importlib.metadata import PackageNotFoundError, version | ||
except ModuleNotFoundError: | ||
# Try backported to PY<38 `importlib_metadata`. | ||
from importlib_metadata import PackageNotFoundError, version | ||
|
||
try: | ||
__version__ = version("openqdc") | ||
except PackageNotFoundError: | ||
# package is not installed | ||
__version__ = "dev" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
from typing import List, Optional | ||
|
||
import typer | ||
from loguru import logger | ||
from prettytable import PrettyTable | ||
from typing_extensions import Annotated | ||
|
||
from openqdc import AVAILABLE_DATASETS | ||
from openqdc.raws.config_factory import DataConfigFactory | ||
from openqdc.raws.fetch import DataDownloader | ||
|
||
app = typer.Typer(help="OpenQDC CLI") | ||
|
||
|
||
def exist_dataset(dataset): | ||
if dataset not in AVAILABLE_DATASETS: | ||
logger.error(f"{dataset} is not available. Please open an issue on Github for the team to look into it.") | ||
return False | ||
return True | ||
|
||
|
||
@app.command() | ||
def download( | ||
datasets: List[str], | ||
overwrite: Annotated[ | ||
bool, | ||
typer.Option( | ||
help="Whether to overwrite or force the re-download of the datasets.", | ||
), | ||
] = False, | ||
cache_dir: Annotated[ | ||
Optional[str], | ||
typer.Option( | ||
help="Path to the cache. If not provided, the default cache directory (.cache/openqdc/) will be used.", | ||
), | ||
] = None, | ||
): | ||
""" | ||
Download preprocessed ml-ready datasets from the main openQDC hub. | ||
Example: | ||
openqdc download Spice QMugs | ||
""" | ||
for dataset in list(map(lambda x: x.lower().replace("_", ""), datasets)): | ||
if exist_dataset(dataset): | ||
if AVAILABLE_DATASETS[dataset].no_init().is_cached() and not overwrite: | ||
logger.info(f"{dataset} is already cached. Skipping download") | ||
else: | ||
AVAILABLE_DATASETS[dataset](overwrite_local_cache=True, cache_dir=cache_dir) | ||
|
||
|
||
@app.command() | ||
def datasets(): | ||
""" | ||
Print a table of the available openQDC datasets and some informations. | ||
""" | ||
table = PrettyTable(["Name", "Forces", "Level of theory"]) | ||
for dataset in AVAILABLE_DATASETS: | ||
empty_dataset = AVAILABLE_DATASETS[dataset].no_init() | ||
has_forces = False if not empty_dataset.__force_methods__ else True | ||
table.add_row([dataset, has_forces, ",".join(empty_dataset.__energy_methods__)]) | ||
table.align = "l" | ||
print(table) | ||
|
||
|
||
@app.command() | ||
def fetch(datasets: List[str]): | ||
""" | ||
Download the raw datasets files from the main openQDC hub. | ||
Special case: if the dataset is "all", all available datasets will be downloaded. | ||
Example: | ||
openqdc fetch Spice | ||
""" | ||
if datasets[0] == "all": | ||
dataset_names = DataConfigFactory.available_datasets | ||
else: | ||
dataset_names = datasets | ||
|
||
for dataset_name in dataset_names: | ||
dd = DataDownloader() | ||
dd.from_name(dataset_name) | ||
|
||
|
||
if __name__ == "__main__": | ||
app() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from .base import BaseDataset # noqa | ||
from .interaction import DES # noqa | ||
from .potential.ani import ANI1, ANI1CCX, ANI1X # noqa | ||
from .potential.comp6 import COMP6 # noqa | ||
from .potential.dummy import Dummy # noqa | ||
from .potential.gdml import GDML # noqa | ||
from .potential.geom import GEOM # noqa | ||
from .potential.iso_17 import ISO17 # noqa | ||
from .potential.molecule3d import Molecule3D # noqa | ||
from .potential.nabladft import NablaDFT # noqa | ||
from .potential.orbnet_denali import OrbnetDenali # noqa | ||
from .potential.pcqm import PCQM_B3LYP, PCQM_PM6 # noqa | ||
from .potential.qm7x import QM7X # noqa | ||
from .potential.qmugs import QMugs # noqa | ||
from .potential.sn2_rxn import SN2RXN # noqa | ||
from .potential.solvated_peptides import SolvatedPeptides # noqa | ||
from .potential.spice import Spice # noqa | ||
from .potential.tmqm import TMQM # noqa | ||
from .potential.transition1x import Transition1X # noqa | ||
from .potential.waterclusters3_30 import WaterClusters # noqa | ||
|
||
AVAILABLE_DATASETS = { | ||
"ani1": ANI1, | ||
"ani1ccx": ANI1CCX, | ||
"ani1x": ANI1X, | ||
"comp6": COMP6, | ||
"des": DES, | ||
"gdml": GDML, | ||
"geom": GEOM, | ||
"iso17": ISO17, | ||
"molecule3d": Molecule3D, | ||
"nabladft": NablaDFT, | ||
"orbnetdenali": OrbnetDenali, | ||
"pcqmb3lyp": PCQM_B3LYP, | ||
"pcqmpm6": PCQM_PM6, | ||
"qm7x": QM7X, | ||
"qmugs": QMugs, | ||
"sn2rxn": SN2RXN, | ||
"solvatedpeptides": SolvatedPeptides, | ||
"spice": Spice, | ||
"tmqm": TMQM, | ||
"transition1x": Transition1X, | ||
"watercluster": WaterClusters, | ||
} |
Oops, something went wrong.