diff --git a/src/openqdc/datasets/interaction/__init__.py b/src/openqdc/datasets/interaction/__init__.py new file mode 100644 index 0000000..a920396 --- /dev/null +++ b/src/openqdc/datasets/interaction/__init__.py @@ -0,0 +1,50 @@ +import importlib +import os +from typing import TYPE_CHECKING # noqa F401 + +# The below lazy import logic is coming from openff-toolkit: +# https://github.com/openforcefield/openff-toolkit/blob/b52879569a0344878c40248ceb3bd0f90348076a/openff/toolkit/__init__.py#L44 + +# Dictionary of objects to lazily import; maps the object's name to its module path + +_lazy_imports_obj = { + "BaseInteractionDataset": "openqdc.datasets.interaction.base", + "DES370K": "openqdc.datasets.interaction.des370k", + "Metcalf": "openqdc.datasets.interaction.metcalf", +} + +_lazy_imports_mod = {} + + +def __getattr__(name): + """Lazily import objects from _lazy_imports_obj or _lazy_imports_mod + + Note that this method is only called by Python if the name cannot be found + in the current module.""" + obj_mod = _lazy_imports_obj.get(name) + if obj_mod is not None: + mod = importlib.import_module(obj_mod) + return mod.__dict__[name] + + lazy_mod = _lazy_imports_mod.get(name) + if lazy_mod is not None: + return importlib.import_module(lazy_mod) + + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +def __dir__(): + """Add _lazy_imports_obj and _lazy_imports_mod to dir()""" + keys = (*globals().keys(), *_lazy_imports_obj.keys(), *_lazy_imports_mod.keys()) + return sorted(keys) + + +if TYPE_CHECKING or os.environ.get("OPENQDC_DISABLE_LAZY_LOADING", "0") == "1": + from .base import BaseInteractionDataset + from .des370k import DES370K + + __all__ = [ + "BaseInteractionDataset", + "DES370K", + "Metcalf", + ] diff --git a/src/openqdc/datasets/interaction/base.py b/src/openqdc/datasets/interaction/base.py new file mode 100644 index 0000000..c73ce7c --- /dev/null +++ b/src/openqdc/datasets/interaction/base.py @@ -0,0 +1,45 @@ +from typing import Dict, List, Optional, Union +from openqdc.utils.io import ( + copy_exists, + dict_to_atoms, + get_local_cache, + load_hdf5_file, + load_pkl, + pull_locally, + push_remote, + set_cache_dir, +) +from openqdc.datasets.potential.base import BaseDataset + +from loguru import logger + +import numpy as np + +class BaseInteractionDataset(BaseDataset): + def __init__( + self, + energy_unit: Optional[str] = None, + distance_unit: Optional[str] = None, + overwrite_local_cache: bool = False, + cache_dir: Optional[str] = None, + ) -> None: + super().__init__( + energy_unit=energy_unit, + distance_unit=distance_unit, + overwrite_local_cache=overwrite_local_cache, + cache_dir=cache_dir + ) + + def collate_list(self, list_entries: List[Dict]): + # concatenate entries + print(list_entries[0]) + res = {key: np.concatenate([r[key] for r in list_entries if r is not None], axis=0) \ + for key in list_entries[0] if not isinstance(list_entries[0][key], dict)} + + csum = np.cumsum(res.get("n_atoms")) + print(csum) + x = np.zeros((csum.shape[0], 2), dtype=np.int32) + x[1:, 0], x[:, 1] = csum[:-1], csum + res["position_idx_range"] = x + + return res diff --git a/src/openqdc/datasets/interaction/des370k.py b/src/openqdc/datasets/interaction/des370k.py new file mode 100644 index 0000000..78b3bde --- /dev/null +++ b/src/openqdc/datasets/interaction/des370k.py @@ -0,0 +1,98 @@ +import os +import numpy as np +import pandas as pd + +from typing import Dict, List + +from tqdm import tqdm +from loguru import logger +from openqdc.datasets.interaction import BaseInteractionDataset +from openqdc.utils.molecule import atom_table + + +class DES370K(BaseInteractionDataset): + __name__ = "des370k_interaction" + __energy_unit__ = "hartree" + __distance_unit__ = "ang" + __forces_unit__ = "hartree/ang" + __energy_methods__ = [ + "mp2/cc-pvdz", + "mp2/cc-pvqz", + "mp2/cc-pvtz", + "mp2/cbs", + "ccsd(t)/cc-pvdz", + "ccsd(t)/cbs", # cbs + "ccsd(t)/nn", # nn + "sapt0/aug-cc-pwcvxz", + "sapt0/aug-cc-pwcvxz_es", + "sapt0/aug-cc-pwcvxz_ex", + "sapt0/aug-cc-pwcvxz_exs2", + "sapt0/aug-cc-pwcvxz_ind", + "sapt0/aug-cc-pwcvxz_exind", + "sapt0/aug-cc-pwcvxz_disp", + "sapt0/aug-cc-pwcvxz_exdisp_os", + "sapt0/aug-cc-pwcvxz_exdisp_ss", + "sapt0/aug-cc-pwcvxz_delta_HF", + ] + + energy_target_names = [ + "cc_MP2_all", + "qz_MP2_all", + "tz_MP2_all", + "cbs_MP2_all", + "cc_CCSD(T)_all", + "cbs_CCSD(T)_all", + "nn_CCSD(T)_all", + "sapt_all", + "sapt_es", + "sapt_ex", + "sapt_exs2", + "sapt_ind", + "sapt_exind", + "sapt_disp", + "sapt_exdisp_os", + "sapt_exdisp_ss", + "sapt_delta_HF", + ] + + def read_raw_entries(self) -> List[Dict]: + self.filepath = os.path.join(self.root, "DES370K.csv") + logger.info(f"Reading DES370K interaction data from {self.filepath}") + df = pd.read_csv(self.filepath) + data = [] + for idx, row in tqdm(df.iterrows(), total=df.shape[0]): + smiles0, smiles1 = row["smiles0"], row["smiles1"] + charge0, charge1 = row["charge0"], row["charge1"] + natoms0, natoms1 = row["natoms0"], row["natoms1"] + pos = np.array(list(map(float, row["xyz"].split()))).reshape(-1, 3) + pos0 = pos[:natoms0] + pos1 = pos[natoms0:] + + elements = row["elements"].split() + elements0 = np.array(elements[:natoms0]) + elements1 = np.array(elements[natoms0:]) + + atomic_nums = np.expand_dims(np.array([atom_table.GetAtomicNumber(x) for x in elements]), axis=1) + atomic_nums0 = np.array(atomic_nums[:natoms0]) + atomic_nums1 = np.array(atomic_nums[natoms0:]) + + charges = np.expand_dims(np.array([charge0] * natoms0 + [charge1] * natoms1), axis=1) + + atomic_inputs = np.concatenate((atomic_nums, charges, pos), axis=-1, dtype=np.float32) + atomic_inputs0 = atomic_inputs[:natoms0, :] + atomic_inputs1 = atomic_inputs[natoms0:, :] + + energies = np.array(row[self.energy_target_names].values).astype(np.float32)[None, :] + + name = np.array([smiles0 + "." + smiles1]) + + item = dict( + energies=energies, + subset=np.array(["DES370K"]), + n_atoms=np.array([natoms0 + natoms1], dtype=np.int32), + n_atoms_first=np.array([natoms0], dtype=np.int32), + atomic_inputs=atomic_inputs, + name=name, + ) + data.append(item) + return data diff --git a/src/openqdc/datasets/interaction/metcalf.py b/src/openqdc/datasets/interaction/metcalf.py new file mode 100644 index 0000000..3a8e714 --- /dev/null +++ b/src/openqdc/datasets/interaction/metcalf.py @@ -0,0 +1,91 @@ +import os +import numpy as np + +from typing import Dict, List + +from tqdm import tqdm +from rdkit import Chem +from loguru import logger +from openqdc.datasets.interaction import BaseInteractionDataset +from openqdc.utils.molecule import atom_table + +class Metcalf(BaseInteractionDataset): + __name__ = "metcalf" + __energy_unit__ = "hartree" + __distance_unit__ = "ang" + __forces_unit__ = None + __energy_methods__ = [ + "SAPT0/jun-cc-pVDZ" + ] + energy_target_names = [ + "total energy", + "electrostatic energy", + "exchange energy", + "induction energy", + "dispersion energy", + ] + + def read_raw_entries(self) -> List[Dict]: + """ + SMILES strings are inferred from the + .xyz files using the RDKit xyz2mol function. + More details here: + + https://github.com/jensengroup/xyz2mol + + """ + data = [] + for dirname in os.listdir(self.root): + xyz_dir = os.path.join(self.root, dirname) + if not os.path.isdir(xyz_dir): + continue + subset = np.array([dirname.split("-")[0].lower()]) # training, validation, or test + for filename in os.listdir(xyz_dir): + if not filename.endswith(".xyz"): + continue + lines = list(map(lambda x: x.strip(), open(os.path.join(xyz_dir, filename), "r").readlines())) + line_two = lines[1].split(",") + energies = np.array([line_two[1:6]], dtype=np.float32) + num_atoms = np.array([int(lines[0])]) + num_atoms0 = int(line_two[-1]) + num_atoms1 = num_atoms[0] - num_atoms0 + + elem_xyz = np.array([x.split() for x in lines[2:]]) + # elements = np.expand_dims(elem_xyz[:, 0], axis=0) + elements = elem_xyz[:, 0] + xyz = elem_xyz[:, 1:].astype(np.float32) + # xyz0_fname = os.path.join(xyz_dir, f"{filename}_0_tmp.xyz") + # with open(xyz0_fname, "w") as xyz_0_file: + # lines_to_write = [str(num_atoms0) + "\n"] + ["charge=0=\n"] + list(map(lambda x: " ".join(x) + "\n", elem_xyz[:num_atoms0].tolist())) + # lines_to_write[-1] = lines_to_write[-1][:-1] + # print(lines_to_write) + # xyz_0_file.writelines(lines_to_write) # writelines doesn't actually add a newline to each string (weird) + + # xyz1_fname = os.path.join(xyz_dir, f"{filename}_1_tmp.xyz") + # with open(xyz1_fname, "w") as xyz_1_file: + # lines_to_write = [str(num_atoms1) + "\n"] + ["charge=0=\n"] + list(map(lambda x: " ".join(x) + "\n", elem_xyz[num_atoms0:].tolist())) + # lines_to_write[-1] = lines_to_write[-1][:-1] + # xyz_1_file.writelines(lines_to_write) + + # smiles0 = Chem.MolToSmiles(Chem.MolFromXYZFile(xyz0_fname)) + # smiles1 = Chem.MolToSmiles(Chem.MolFromXYZFile(xyz1_fname)) + # + atomic_nums = np.expand_dims(np.array([atom_table.GetAtomicNumber(x) for x in elements]), axis=1) + charges = np.expand_dims(np.array([0] * num_atoms[0]), axis=1) + + atomic_inputs = np.concatenate((atomic_nums, charges, xyz), axis=-1, dtype=np.float32) + + item = dict( + # elements=elements, + n_atoms=num_atoms, + subset=subset, + energies=energies, + positions=xyz, + atomic_inputs=atomic_inputs, + name=np.array([""]) + ) + data.append(item) + return data + + + diff --git a/src/openqdc/datasets/__init__.py b/src/openqdc/datasets/potential/__init__.py similarity index 69% rename from src/openqdc/datasets/__init__.py rename to src/openqdc/datasets/potential/__init__.py index d989935..1513c28 100644 --- a/src/openqdc/datasets/__init__.py +++ b/src/openqdc/datasets/potential/__init__.py @@ -8,28 +8,28 @@ # Dictionary of objects to lazily import; maps the object's name to its module path _lazy_imports_obj = { - "ANI1": "openqdc.datasets.ani", - "ANI1CCX": "openqdc.datasets.ani", - "ANI1X": "openqdc.datasets.ani", - "Spice": "openqdc.datasets.spice", - "GEOM": "openqdc.datasets.geom", - "QMugs": "openqdc.datasets.qmugs", - "ISO17": "openqdc.datasets.iso_17", - "COMP6": "openqdc.datasets.comp6", - "GDML": "openqdc.datasets.gdml", - "Molecule3D": "openqdc.datasets.molecule3d", - "OrbnetDenali": "openqdc.datasets.orbnet_denali", - "SN2RXN": "openqdc.datasets.sn2_rxn", - "QM7X": "openqdc.datasets.qm7x", - "DESS": "openqdc.datasets.dess", - "NablaDFT": "openqdc.datasets.nabladft", - "SolvatedPeptides": "openqdc.datasets.solvated_peptides", - "WaterClusters": "openqdc.datasets.waterclusters3_30", - "TMQM": "openqdc.datasets.tmqm", - "Dummy": "openqdc.datasets.dummy", - "PCQM_B3LYP": "openqdc.datasets.pcqm", - "PCQM_PM6": "openqdc.datasets.pcqm", - "Transition1X": "openqdc.datasets.transition1x", + "ANI1": "openqdc.datasets.potential.ani", + "ANI1CCX": "openqdc.datasets.potential.ani", + "ANI1X": "openqdc.datasets.potential.ani", + "Spice": "openqdc.datasets.potential.spice", + "GEOM": "openqdc.datasets.potential.geom", + "QMugs": "openqdc.datasets.potential.qmugs", + "ISO17": "openqdc.datasets.potential.iso_17", + "COMP6": "openqdc.datasets.potential.comp6", + "GDML": "openqdc.datasets.potential.gdml", + "Molecule3D": "openqdc.datasets.potential.molecule3d", + "OrbnetDenali": "openqdc.datasets.potential.orbnet_denali", + "SN2RXN": "openqdc.datasets.potential.sn2_rxn", + "QM7X": "openqdc.datasets.potential.qm7x", + "DESS": "openqdc.datasets.potential.dess", + "NablaDFT": "openqdc.datasets.potential.nabladft", + "SolvatedPeptides": "openqdc.datasets.potential.solvated_peptides", + "WaterClusters": "openqdc.datasets.potential.waterclusters3_30", + "TMQM": "openqdc.datasets.potential.tmqm", + "Dummy": "openqdc.datasets.potential.dummy", + "PCQM_B3LYP": "openqdc.datasets.potential.pcqm", + "PCQM_PM6": "openqdc.datasets.potential.pcqm", + "Transition1X": "openqdc.datasets.potential.transition1x", } _lazy_imports_mod = {} diff --git a/src/openqdc/datasets/ani.py b/src/openqdc/datasets/potential/ani.py similarity index 100% rename from src/openqdc/datasets/ani.py rename to src/openqdc/datasets/potential/ani.py diff --git a/src/openqdc/datasets/base.py b/src/openqdc/datasets/potential/base.py similarity index 99% rename from src/openqdc/datasets/base.py rename to src/openqdc/datasets/potential/base.py index adf9ae6..447985a 100644 --- a/src/openqdc/datasets/base.py +++ b/src/openqdc/datasets/potential/base.py @@ -125,6 +125,10 @@ def _post_init( self._convert_data() self._set_isolated_atom_energies() + @classmethod + def no_init(cls): + return cls.__new__(cls) + def _convert_data(self): logger.info( f"Converting {self.__name__} data to the following units:\n\ @@ -325,6 +329,7 @@ def read_raw_entries(self): def collate_list(self, list_entries): # concatenate entries + logger.info(f"list entries: {type(list_entries)}") res = {key: np.concatenate([r[key] for r in list_entries if r is not None], axis=0) for key in list_entries[0]} csum = np.cumsum(res.get("n_atoms")) diff --git a/src/openqdc/datasets/comp6.py b/src/openqdc/datasets/potential/comp6.py similarity index 96% rename from src/openqdc/datasets/comp6.py rename to src/openqdc/datasets/potential/comp6.py index 7b6890b..ac37094 100644 --- a/src/openqdc/datasets/comp6.py +++ b/src/openqdc/datasets/potential/comp6.py @@ -1,6 +1,6 @@ from os.path import join as p_join -from openqdc.datasets.base import BaseDataset, read_qc_archive_h5 +from openqdc.datasets.potential.base import BaseDataset, read_qc_archive_h5 class COMP6(BaseDataset): diff --git a/src/openqdc/datasets/dess.py b/src/openqdc/datasets/potential/dess.py similarity index 95% rename from src/openqdc/datasets/dess.py rename to src/openqdc/datasets/potential/dess.py index 80b1e1c..fd36e07 100644 --- a/src/openqdc/datasets/dess.py +++ b/src/openqdc/datasets/potential/dess.py @@ -5,7 +5,7 @@ import pandas as pd from tqdm import tqdm -from openqdc.datasets.base import BaseDataset +from openqdc.datasets.potential.base import BaseDataset from openqdc.utils.molecule import get_atomic_number_and_charge @@ -58,7 +58,6 @@ class DESS(BaseDataset): "nn_CCSD(T)_all", "sapt_all", ] - # ['qz_MP2_all', 'tz_MP2_all', 'cbs_MP2_all', 'sapt_all', 'nn_CCSD(T)_all'] partitions = ["DES370K", "DES5M"] diff --git a/src/openqdc/datasets/dummy.py b/src/openqdc/datasets/potential/dummy.py similarity index 97% rename from src/openqdc/datasets/dummy.py rename to src/openqdc/datasets/potential/dummy.py index c87e03d..b4bd2e8 100644 --- a/src/openqdc/datasets/dummy.py +++ b/src/openqdc/datasets/potential/dummy.py @@ -2,7 +2,7 @@ from numpy import array from sklearn.utils import Bunch -from openqdc.datasets.base import BaseDataset +from openqdc.datasets.potential.base import BaseDataset from openqdc.utils.atomization_energies import IsolatedAtomEnergyFactory from openqdc.utils.constants import NOT_DEFINED diff --git a/src/openqdc/datasets/gdml.py b/src/openqdc/datasets/potential/gdml.py similarity index 95% rename from src/openqdc/datasets/gdml.py rename to src/openqdc/datasets/potential/gdml.py index e40b3fa..67f6bde 100644 --- a/src/openqdc/datasets/gdml.py +++ b/src/openqdc/datasets/potential/gdml.py @@ -1,6 +1,6 @@ from os.path import join as p_join -from openqdc.datasets.base import BaseDataset, read_qc_archive_h5 +from openqdc.datasets.potential.base import BaseDataset, read_qc_archive_h5 class GDML(BaseDataset): diff --git a/src/openqdc/datasets/geom.py b/src/openqdc/datasets/potential/geom.py similarity index 98% rename from src/openqdc/datasets/geom.py rename to src/openqdc/datasets/potential/geom.py index c016a9f..8b87ead 100644 --- a/src/openqdc/datasets/geom.py +++ b/src/openqdc/datasets/potential/geom.py @@ -4,7 +4,7 @@ import datamol as dm import numpy as np -from openqdc.datasets.base import BaseDataset +from openqdc.datasets.potential.base import BaseDataset from openqdc.utils import load_json, load_pkl from openqdc.utils.molecule import get_atomic_number_and_charge diff --git a/src/openqdc/datasets/iso_17.py b/src/openqdc/datasets/potential/iso_17.py similarity index 95% rename from src/openqdc/datasets/iso_17.py rename to src/openqdc/datasets/potential/iso_17.py index 4553ec1..1dd5d56 100644 --- a/src/openqdc/datasets/iso_17.py +++ b/src/openqdc/datasets/potential/iso_17.py @@ -1,6 +1,6 @@ from os.path import join as p_join -from openqdc.datasets.base import BaseDataset, read_qc_archive_h5 +from openqdc.datasets.potential.base import BaseDataset, read_qc_archive_h5 class ISO17(BaseDataset): diff --git a/src/openqdc/datasets/molecule3d.py b/src/openqdc/datasets/potential/molecule3d.py similarity index 98% rename from src/openqdc/datasets/molecule3d.py rename to src/openqdc/datasets/potential/molecule3d.py index dc47e53..834525a 100644 --- a/src/openqdc/datasets/molecule3d.py +++ b/src/openqdc/datasets/potential/molecule3d.py @@ -8,7 +8,7 @@ from rdkit import Chem from tqdm import tqdm -from openqdc.datasets.base import BaseDataset +from openqdc.datasets.potential.base import BaseDataset from openqdc.utils.molecule import get_atomic_number_and_charge diff --git a/src/openqdc/datasets/nabladft.py b/src/openqdc/datasets/potential/nabladft.py similarity index 98% rename from src/openqdc/datasets/nabladft.py rename to src/openqdc/datasets/potential/nabladft.py index 0555cdc..703fdc5 100644 --- a/src/openqdc/datasets/nabladft.py +++ b/src/openqdc/datasets/potential/nabladft.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd -from openqdc.datasets.base import BaseDataset +from openqdc.datasets.potential.base import BaseDataset from openqdc.utils.molecule import z_to_formula from openqdc.utils.package_utils import requires_package diff --git a/src/openqdc/datasets/orbnet_denali.py b/src/openqdc/datasets/potential/orbnet_denali.py similarity index 98% rename from src/openqdc/datasets/orbnet_denali.py rename to src/openqdc/datasets/potential/orbnet_denali.py index 614e252..14c19ef 100644 --- a/src/openqdc/datasets/orbnet_denali.py +++ b/src/openqdc/datasets/potential/orbnet_denali.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd -from openqdc.datasets.base import BaseDataset +from openqdc.datasets.potential.base import BaseDataset from openqdc.utils.molecule import atom_table diff --git a/src/openqdc/datasets/pcqm.py b/src/openqdc/datasets/potential/pcqm.py similarity index 99% rename from src/openqdc/datasets/pcqm.py rename to src/openqdc/datasets/potential/pcqm.py index 543c494..c86d2b6 100644 --- a/src/openqdc/datasets/pcqm.py +++ b/src/openqdc/datasets/potential/pcqm.py @@ -10,7 +10,7 @@ import pandas as pd from loguru import logger -from openqdc.datasets.base import BaseDataset +from openqdc.datasets.potential.base import BaseDataset from openqdc.utils.io import get_local_cache, push_remote diff --git a/src/openqdc/datasets/qm7x.py b/src/openqdc/datasets/potential/qm7x.py similarity index 96% rename from src/openqdc/datasets/qm7x.py rename to src/openqdc/datasets/potential/qm7x.py index eb8b015..361707f 100644 --- a/src/openqdc/datasets/qm7x.py +++ b/src/openqdc/datasets/potential/qm7x.py @@ -3,7 +3,7 @@ import numpy as np from tqdm import tqdm -from openqdc.datasets.base import BaseDataset +from openqdc.datasets.potential.base import BaseDataset from openqdc.utils.io import load_hdf5_file diff --git a/src/openqdc/datasets/qmugs.py b/src/openqdc/datasets/potential/qmugs.py similarity index 97% rename from src/openqdc/datasets/qmugs.py rename to src/openqdc/datasets/potential/qmugs.py index c75f8b5..b7e0a86 100644 --- a/src/openqdc/datasets/qmugs.py +++ b/src/openqdc/datasets/potential/qmugs.py @@ -5,7 +5,7 @@ import datamol as dm import numpy as np -from openqdc.datasets.base import BaseDataset +from openqdc.datasets.potential.base import BaseDataset from openqdc.utils.molecule import get_atomic_number_and_charge diff --git a/src/openqdc/datasets/sn2_rxn.py b/src/openqdc/datasets/potential/sn2_rxn.py similarity index 95% rename from src/openqdc/datasets/sn2_rxn.py rename to src/openqdc/datasets/potential/sn2_rxn.py index abcbd62..f8e35cc 100644 --- a/src/openqdc/datasets/sn2_rxn.py +++ b/src/openqdc/datasets/potential/sn2_rxn.py @@ -1,6 +1,6 @@ from os.path import join as p_join -from openqdc.datasets.base import BaseDataset, read_qc_archive_h5 +from openqdc.datasets.potential.base import BaseDataset, read_qc_archive_h5 class SN2RXN(BaseDataset): diff --git a/src/openqdc/datasets/solvated_peptides.py b/src/openqdc/datasets/potential/solvated_peptides.py similarity index 93% rename from src/openqdc/datasets/solvated_peptides.py rename to src/openqdc/datasets/potential/solvated_peptides.py index 216ecdd..77d8318 100644 --- a/src/openqdc/datasets/solvated_peptides.py +++ b/src/openqdc/datasets/potential/solvated_peptides.py @@ -1,6 +1,6 @@ from os.path import join as p_join -from openqdc.datasets.base import BaseDataset, read_qc_archive_h5 +from openqdc.datasets.potential.base import BaseDataset, read_qc_archive_h5 class SolvatedPeptides(BaseDataset): diff --git a/src/openqdc/datasets/spice.py b/src/openqdc/datasets/potential/spice.py similarity index 98% rename from src/openqdc/datasets/spice.py rename to src/openqdc/datasets/potential/spice.py index 0b90912..d891f6b 100644 --- a/src/openqdc/datasets/spice.py +++ b/src/openqdc/datasets/potential/spice.py @@ -4,7 +4,7 @@ import numpy as np from tqdm import tqdm -from openqdc.datasets.base import BaseDataset +from openqdc.datasets.potential.base import BaseDataset from openqdc.utils import load_hdf5_file from openqdc.utils.molecule import get_atomic_number_and_charge diff --git a/src/openqdc/datasets/tmqm.py b/src/openqdc/datasets/potential/tmqm.py similarity index 97% rename from src/openqdc/datasets/tmqm.py rename to src/openqdc/datasets/potential/tmqm.py index 8952aaa..b613929 100644 --- a/src/openqdc/datasets/tmqm.py +++ b/src/openqdc/datasets/potential/tmqm.py @@ -5,7 +5,7 @@ import pandas as pd from tqdm import tqdm -from openqdc.datasets.base import BaseDataset +from openqdc.datasets.potential.base import BaseDataset from openqdc.utils.molecule import atom_table diff --git a/src/openqdc/datasets/transition1x.py b/src/openqdc/datasets/potential/transition1x.py similarity index 97% rename from src/openqdc/datasets/transition1x.py rename to src/openqdc/datasets/potential/transition1x.py index 0285ec9..c266325 100644 --- a/src/openqdc/datasets/transition1x.py +++ b/src/openqdc/datasets/potential/transition1x.py @@ -3,7 +3,7 @@ import numpy as np from tqdm import tqdm -from openqdc.datasets.base import BaseDataset +from openqdc.datasets.potential.base import BaseDataset from openqdc.utils.constants import NB_ATOMIC_FEATURES from openqdc.utils.io import load_hdf5_file diff --git a/src/openqdc/datasets/waterclusters3_30.py b/src/openqdc/datasets/potential/waterclusters3_30.py similarity index 97% rename from src/openqdc/datasets/waterclusters3_30.py rename to src/openqdc/datasets/potential/waterclusters3_30.py index 6aa5748..ce0e5de 100644 --- a/src/openqdc/datasets/waterclusters3_30.py +++ b/src/openqdc/datasets/potential/waterclusters3_30.py @@ -4,7 +4,7 @@ import numpy as np from tqdm import tqdm -from openqdc.datasets.base import BaseDataset +from openqdc.datasets.potential.base import BaseDataset from openqdc.utils.constants import MAX_ATOMIC_NUMBER from openqdc.utils.molecule import atom_table diff --git a/src/openqdc/raws/config_factory.py b/src/openqdc/raws/config_factory.py index c8dddba..df54307 100644 --- a/src/openqdc/raws/config_factory.py +++ b/src/openqdc/raws/config_factory.py @@ -90,6 +90,13 @@ class DataConfigFactory: }, ) + des370k_interaction = dict( + dataset_name="des370k_interaction", + links={ + "DES370K.zip": "https://zenodo.org/record/5676266/files/DES370K.zip", + } + ) + tmqm = dict( dataset_name="tmqm", links={