Skip to content

Commit

Permalink
Merge branch 'refactoring' of https://github.com/OpenDrugDiscovery/op…
Browse files Browse the repository at this point in the history
…enQDC into refactoring
  • Loading branch information
FNTwin committed Mar 8, 2024
2 parents 1e6652f + 088d457 commit b297cef
Showing 1 changed file with 43 additions and 28 deletions.
71 changes: 43 additions & 28 deletions openqdc/datasets/potential/dummy.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import numpy as np # noqa
from numpy import array
from sklearn.utils import Bunch
import numpy as np

from openqdc.datasets.base import BaseDataset
from openqdc.utils.atomization_energies import IsolatedAtomEnergyFactory
from openqdc.utils.constants import NOT_DEFINED


class Dummy(BaseDataset):
"""
Dummy dataset
Dummy dataset for testing.
"""

__name__ = "dummy"
Expand All @@ -30,21 +27,26 @@ def _stats(self):
return {
"formation": {
"energy": {
"mean": array([[-12.94348027, -9.83037297]]),
"std": array([[4.39971409, 3.3574188]]),
"mean": np.array([[-12.94348027, -9.83037297]]),
"std": np.array([[4.39971409, 3.3574188]]),
},
"forces": NOT_DEFINED,
},
"total": {
"energy": {
"mean": array([[-89.44242, -1740.5336]]),
"std": array([[29.599571, 791.48663]]),
"mean": np.array([[-89.44242, -1740.5336]]),
"std": np.array([[29.599571, 791.48663]]),
},
"forces": NOT_DEFINED,
},
}

def __init__(self, energy_unit=None, distance_unit=None, cache_dir=None) -> None:
def __init__(
self,
energy_unit=None,
distance_unit=None,
cache_dir=None,
) -> None:
try:
super().__init__(energy_unit=energy_unit, distance_unit=distance_unit, cache_dir=cache_dir)

Expand All @@ -54,8 +56,37 @@ def __init__(self, energy_unit=None, distance_unit=None, cache_dir=None) -> None
self.setup_dummy()

def setup_dummy(self):
self._n_atoms = np.array([np.random.randint(1, 100) for _ in range(self.__len__())])
self.__average_nb_atoms__ = self._n_atoms.mean()
n_atoms = np.array([np.random.randint(1, 100) for _ in range(len(self))])
position_idx_range = np.concatenate([[0], np.cumsum(n_atoms)]).repeat(2)[1:-1].reshape(-1, 2)
atomic_inputs = np.concatenate(
[
np.concatenate(
[
# z, c, x, y, z
np.random.randint(1, 100, size=(size, 1)),
np.random.randint(-1, 2, size=(size, 1)),
np.random.randn(size, 3),
],
axis=1,
)
for size in n_atoms
],
axis=0,
) # (sum(n_atoms), 5)
name = [f"dummy_{i}" for i in range(len(self))]
subset = ["dummy" for i in range(len(self))]
energies = np.random.rand(len(self), len(self.__energy_methods__))
forces = np.concatenate([np.random.randn(size, 3, len(self.__force_methods__)) * 100 for size in n_atoms])
self.data = dict(
n_atoms=n_atoms,
position_idx_range=position_idx_range,
name=name,
atomic_inputs=atomic_inputs,
subset=subset,
energies=energies,
forces=forces,
)
self.__average_nb_atoms__ = self.data["n_atoms"].mean()

def is_preprocessed(self):
return True
Expand All @@ -65,19 +96,3 @@ def read_raw_entries(self):

def __len__(self):
return 9999

def __getitem__(self, idx: int):
shift = IsolatedAtomEnergyFactory.max_charge
size = self._n_atoms[idx]
z = np.random.randint(1, 100, size)
c = np.random.randint(-1, 2, size)
return Bunch(
positions=np.random.rand(size, 3) * 10,
atomic_numbers=z,
charges=c,
e0=self.__isolated_atom_energies__[..., z, c + shift].T,
energies=np.random.randn(len(self.__energy_methods__)),
name="dummy_{}".format(idx),
subset="dummy",
forces=(np.random.randn(size, 3, len(self.__force_methods__)) * 100),
)

0 comments on commit b297cef

Please sign in to comment.