Skip to content

Commit

Permalink
Updated docs for spice, iso17, nabladft
Browse files Browse the repository at this point in the history
  • Loading branch information
shenoynikhil committed Oct 5, 2023
1 parent 99a3506 commit bf3c08a
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 1 deletion.
25 changes: 25 additions & 0 deletions src/openqdc/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from .comp6 import COMP6
from .gdml import GDML
from .geom import GEOM
from .iso_17 import ISO17
from .molecule3d import Molecule3D
from .nabladft import NablaDFT
from .orbnet_denali import OrbnetDenali
from .qmugs import QMugs
from .sn2_rxn import SN2RXN
from .spice import Spice

__all__ = [
"Spice",
"GEOM",
"QMugs",
"NablaDFT",
"ISO17",
"COMP6",
"GDML",
"Molecule3D",
"NablaDFT",
"OrbnetDenali",
"QMugs",
"SN2RXN",
]
17 changes: 17 additions & 0 deletions src/openqdc/datasets/iso_17.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,23 @@


class ISO17(BaseDataset):
"""
ISO17 dataset consists of the largest set of isomers from the QM9 dataset that consists of a fixed
composition of atoms (C7O2H10) arranged in different chemically valid structures. It consists of consist
of 129 molecules each containing 5,000 conformational geometries, energies and forces with a resolution
of 1 femtosecond in the molecular dynamics trajectories. The simulations were carried out using the
Perdew-Burke-Ernzerhof (PBE) functional and the Tkatchenko-Scheffler (TS) van der Waals correction method.
Usage:
```python
from openqdc.datasets import ISO17
dataset = ISO17()
```
References:
- https://paperswithcode.com/dataset/iso17
"""

__name__ = "iso_17"

# Energy in hartree, all zeros by default
Expand Down
19 changes: 18 additions & 1 deletion src/openqdc/datasets/nabladft.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from os.path import join as p_join
from typing import Dict

import datamol as dm
import numpy as np
Expand All @@ -10,7 +11,7 @@
from openqdc.utils.constants import MAX_ATOMIC_NUMBER


def to_mol(entry):
def to_mol(entry) -> Dict[str, np.ndarray]:
Z, R, E, F = entry[:4]
C = np.zeros_like(Z)

Expand All @@ -37,6 +38,22 @@ def read_chunk_from_db(raw_path, start_idx, stop_idx, step_size=1000):


class NablaDFT(BaseDataset):
"""
NablaDFT is a dataset constructed from a subset of the
[Molecular Sets (MOSES) dataset](https://github.com/molecularsets/moses) consisting of 1 million molecules
with 5,340,152 unique conformations generated using ωB97X-D/def2-SVP level of theory.
Usage:
```python
from openqdc.datasets import NablaDFT
dataset = NablaDFT()
```
References:
- https://pubs.rsc.org/en/content/articlelanding/2022/CP/D2CP03966D
- https://github.com/AIRI-Institute/nablaDFT
"""

__name__ = "nabladft"
__energy_methods__ = ["wb97x-d_svp"]

Expand Down
17 changes: 17 additions & 0 deletions src/openqdc/datasets/spice.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,25 @@ def read_record(r):


class Spice(BaseDataset):
"""
Spice Dataset consists of 1.1 million conformations for a diverse set of 19k unique molecules consisting of
small molecules, dimers, dipeptides, and solvated amino acids. It consists of both forces and energies calculated
at {\omega}B97M-D3(BJ)/def2-TZVPPD level of theory.
Usage:
```python
from openqdc.datasets import Spice
dataset = Spice()
```
References:
- https://arxiv.org/abs/2209.10702
- https://github.com/openmm/spice-dataset
"""

__name__ = "spice"
__energy_methods__ = ["wb97x_tz"]
__force_methods__ = ["wb97x_tz"]

energy_target_names = ["dft_total_energy"]

Expand Down

0 comments on commit bf3c08a

Please sign in to comment.