Skip to content

Commit

Permalink
fix merge conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
mcneela committed Mar 12, 2024
2 parents e969b54 + 0079fc5 commit bc3f5fc
Show file tree
Hide file tree
Showing 13 changed files with 11,406 additions and 107 deletions.
4 changes: 4 additions & 0 deletions openqdc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@
"PCQM_B3LYP": "openqdc.datasets.potential.pcqm",
"PCQM_PM6": "openqdc.datasets.potential.pcqm",
"Transition1X": "openqdc.datasets.potential.transition1x",
"MultixcQM9": "openqdc.datasets.potential.multixcqm9",
"AVAILABLE_DATASETS": "openqdc.datasets",
"AVAILABLE_POTENTIAL_DATASETS": "openqdc.datasets.potential",
"AVAILABLE_INTERACTION_DATASETS": "openqdc.datasets.interaction",
}

_lazy_imports_mod = {"datasets": "openqdc.datasets", "utils": "openqdc.utils"}
Expand Down Expand Up @@ -75,6 +78,7 @@ def __dir__():
from .datasets.potential.geom import GEOM # noqa
from .datasets.potential.iso_17 import ISO17 # noqa
from .datasets.potential.molecule3d import Molecule3D # noqa
from .datasets.potential.multixcqm9 import MultixcQM9 # noqa
from .datasets.potential.nabladft import NablaDFT # noqa
from .datasets.potential.orbnet_denali import OrbnetDenali # noqa
from .datasets.potential.pcqm import PCQM_B3LYP, PCQM_PM6 # noqa
Expand Down
22 changes: 19 additions & 3 deletions openqdc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from prettytable import PrettyTable
from typing_extensions import Annotated

from openqdc import AVAILABLE_DATASETS
from openqdc import AVAILABLE_DATASETS, AVAILABLE_POTENTIAL_DATASETS
from openqdc.raws.config_factory import DataConfigFactory
from openqdc.raws.fetch import DataDownloader

Expand All @@ -19,6 +19,14 @@ def exist_dataset(dataset):
return True


def format_entry(empty_dataset):
if len(empty_dataset.__energy_methods__) > 10:
entry = ",".join(empty_dataset.__energy_methods__[:10]) + "..."
else:
entry = ",".join(empty_dataset.__energy_methods__[:10])
return entry


@app.command()
def download(
datasets: List[str],
Expand Down Expand Up @@ -54,11 +62,19 @@ def datasets():
"""
Print a table of the available openQDC datasets and some informations.
"""
table = PrettyTable(["Name", "Forces", "Level of theory"])
table = PrettyTable(["Name", "Type of Energy", "Forces", "Level of theory"])
for dataset in AVAILABLE_DATASETS:
empty_dataset = AVAILABLE_DATASETS[dataset].no_init()
has_forces = False if not empty_dataset.__force_methods__ else True
table.add_row([dataset, has_forces, ",".join(empty_dataset.__energy_methods__)])
en_type = "Potential" if dataset in AVAILABLE_POTENTIAL_DATASETS else "Interaction"
table.add_row(
[
dataset,
en_type,
has_forces,
format_entry(empty_dataset),
]
)
table.align = "l"
print(table)

Expand Down
27 changes: 5 additions & 22 deletions openqdc/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from .base import BaseDataset # noqa
from .interaction import AVAILABLE_INTERACTION_DATASETS # noqa
from .interaction import DES # noqa
from .potential import AVAILABLE_POTENTIAL_DATASETS # noqa
from .potential.ani import ANI1, ANI1CCX, ANI1X # noqa
from .potential.comp6 import COMP6 # noqa
from .potential.dummy import Dummy # noqa
from .potential.gdml import GDML # noqa
from .potential.geom import GEOM # noqa
from .potential.iso_17 import ISO17 # noqa
from .potential.molecule3d import Molecule3D # noqa
from .potential.multixcqm9 import MultixcQM9 # noqa
from .potential.nabladft import NablaDFT # noqa
from .potential.orbnet_denali import OrbnetDenali # noqa
from .potential.pcqm import PCQM_B3LYP, PCQM_PM6 # noqa
Expand All @@ -18,25 +22,4 @@
from .potential.transition1x import Transition1X # noqa
from .potential.waterclusters3_30 import WaterClusters # noqa

AVAILABLE_DATASETS = {
"ani1": ANI1,
"ani1ccx": ANI1CCX,
"ani1x": ANI1X,
"comp6": COMP6,
"gdml": GDML,
"geom": GEOM,
"iso17": ISO17,
"molecule3d": Molecule3D,
"nabladft": NablaDFT,
"orbnetdenali": OrbnetDenali,
"pcqmb3lyp": PCQM_B3LYP,
"pcqmpm6": PCQM_PM6,
"qm7x": QM7X,
"qmugs": QMugs,
"sn2rxn": SN2RXN,
"solvatedpeptides": SolvatedPeptides,
"spice": Spice,
"tmqm": TMQM,
"transition1x": Transition1X,
"watercluster": WaterClusters,
}
AVAILABLE_DATASETS = {**AVAILABLE_POTENTIAL_DATASETS, **AVAILABLE_INTERACTION_DATASETS}
2 changes: 1 addition & 1 deletion openqdc/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ def read_preprocess(self, overwrite_local_cache=False):
for key in self.data_keys:
filename = p_join(self.preprocess_path, f"{key}.mmap")
pull_locally(filename, overwrite=overwrite_local_cache)
self.data[key] = np.memmap(filename, mode="r", dtype=self.data_types[key]).reshape(self.data_shapes[key])
self.data[key] = np.memmap(filename, mode="r", dtype=self.data_types[key]).reshape(*self.data_shapes[key])

filename = p_join(self.preprocess_path, "props.pkl")
pull_locally(filename, overwrite=overwrite_local_cache)
Expand Down
86 changes: 18 additions & 68 deletions openqdc/datasets/interaction/__init__.py
Original file line number Diff line number Diff line change
@@ -1,69 +1,19 @@
import importlib
import os
from typing import TYPE_CHECKING # noqa F401

# The below lazy import logic is coming from openff-toolkit:
# https://github.com/openforcefield/openff-toolkit/blob/b52879569a0344878c40248ceb3bd0f90348076a/openff/toolkit/__init__.py#L44

# Dictionary of objects to lazily import; maps the object's name to its module path

_lazy_imports_obj = {
"BaseInteractionDataset": "openqdc.datasets.interaction.base",
"DES370K": "openqdc.datasets.interaction.des370k",
"DES5M": "openqdc.datasets.interaction.des5m",
"Metcalf": "openqdc.datasets.interaction.metcalf",
"DESS66": "openqdc.datasets.interaction.dess66",
"DESS66x8": "openqdc.datasets.interaction.dess66x8",
"L7": "openqdc.datasets.interaction.L7",
"X40": "openqdc.datasets.interaction.X40",
"Splinter": "openqdc.datasets.interaction.splinter",
from .des5m import DES5M
from .des370k import DES370K
from .dess66 import DESS66
from .dess66x8 import DESS66x8
from .L7 import L7
from .metcalf import Metcalf
from .splinter import Splinter
from .X40 import X40

AVAILABLE_INTERACTION_DATASETS = {
"des370k": DES370K,
"des5m": DES5M,
"dess66": DESS66,
"dess66x8": DESS66x8,
"L7": L7,
"metcalf": Metcalf,
"splinter": Splinter,
"X40": X40,
}

_lazy_imports_mod = {}


def __getattr__(name):
"""Lazily import objects from _lazy_imports_obj or _lazy_imports_mod
Note that this method is only called by Python if the name cannot be found
in the current module."""
obj_mod = _lazy_imports_obj.get(name)
if obj_mod is not None:
mod = importlib.import_module(obj_mod)
return mod.__dict__[name]

lazy_mod = _lazy_imports_mod.get(name)
if lazy_mod is not None:
return importlib.import_module(lazy_mod)

raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


def __dir__():
"""Add _lazy_imports_obj and _lazy_imports_mod to dir(<module>)"""
keys = (*globals().keys(), *_lazy_imports_obj.keys(), *_lazy_imports_mod.keys())
return sorted(keys)


if TYPE_CHECKING or os.environ.get("OPENQDC_DISABLE_LAZY_LOADING", "0") == "1":
from .base import BaseInteractionDataset
from .des5m import DES5M
from .des370k import DES370K
from .dess66 import DESS66
from .dess66x8 import DESS66x8
from .L7 import L7
from .metcalf import Metcalf
from .splinter import Splinter
from .X40 import X40

__all__ = [
"BaseInteractionDataset",
"DES370K",
"DES5M",
"Metcalf",
"DESS66",
"DESS66x8",
"L7",
"X40",
"Splinter",
]
4 changes: 3 additions & 1 deletion openqdc/datasets/potential/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .geom import GEOM # noqa
from .iso_17 import ISO17 # noqa
from .molecule3d import Molecule3D # noqa
from .multixcqm9 import MultixcQM9 # noqa
from .nabladft import NablaDFT # noqa
from .orbnet_denali import OrbnetDenali # noqa
from .pcqm import PCQM_B3LYP, PCQM_PM6 # noqa
Expand All @@ -17,7 +18,7 @@
from .transition1x import Transition1X # noqa
from .waterclusters3_30 import WaterClusters # noqa

AVAILABLE_DATASETS = {
AVAILABLE_POTENTIAL_DATASETS = {
"ani1": ANI1,
"ani1ccx": ANI1CCX,
"ani1x": ANI1X,
Expand All @@ -38,4 +39,5 @@
"tmqm": TMQM,
"transition1x": Transition1X,
"watercluster": WaterClusters,
"multixcqm9": MultixcQM9,
}
Loading

0 comments on commit bc3f5fc

Please sign in to comment.