Updated docs for spice, iso17, nabladft

valence-labs · Oct 5, 2023 · bf3c08a · bf3c08a
1 parent 99a3506
commit bf3c08a
Show file tree

Hide file tree

Showing 4 changed files with 77 additions and 1 deletion.
diff --git a/src/openqdc/datasets/__init__.py b/src/openqdc/datasets/__init__.py
@@ -0,0 +1,25 @@
+from .comp6 import COMP6
+from .gdml import GDML
+from .geom import GEOM
+from .iso_17 import ISO17
+from .molecule3d import Molecule3D
+from .nabladft import NablaDFT
+from .orbnet_denali import OrbnetDenali
+from .qmugs import QMugs
+from .sn2_rxn import SN2RXN
+from .spice import Spice
+
+__all__ = [
+    "Spice",
+    "GEOM",
+    "QMugs",
+    "NablaDFT",
+    "ISO17",
+    "COMP6",
+    "GDML",
+    "Molecule3D",
+    "NablaDFT",
+    "OrbnetDenali",
+    "QMugs",
+    "SN2RXN",
+]
diff --git a/src/openqdc/datasets/iso_17.py b/src/openqdc/datasets/iso_17.py
@@ -7,6 +7,23 @@
 
 
 class ISO17(BaseDataset):
+    """
+    ISO17 dataset consists of the largest set of isomers from the QM9 dataset that consists of a fixed
+    composition of atoms (C7O2H10) arranged in different chemically valid structures. It consists of consist
+    of 129 molecules each containing 5,000 conformational geometries, energies and forces with a resolution
+    of 1 femtosecond in the molecular dynamics trajectories. The simulations were carried out using the
+    Perdew-Burke-Ernzerhof (PBE) functional and the Tkatchenko-Scheffler (TS) van der Waals correction method.
+
+    Usage:
+    ```python
+    from openqdc.datasets import ISO17
+    dataset = ISO17()
+    ```
+
+    References:
+    - https://paperswithcode.com/dataset/iso17
+    """
+
     __name__ = "iso_17"
 
     # Energy in hartree, all zeros by default

diff --git a/src/openqdc/datasets/nabladft.py b/src/openqdc/datasets/nabladft.py
@@ -1,5 +1,6 @@
 import os
 from os.path import join as p_join
+from typing import Dict
 
 import datamol as dm
 import numpy as np
@@ -10,7 +11,7 @@
 from openqdc.utils.constants import MAX_ATOMIC_NUMBER
 
 
-def to_mol(entry):
+def to_mol(entry) -> Dict[str, np.ndarray]:
     Z, R, E, F = entry[:4]
     C = np.zeros_like(Z)
 
@@ -37,6 +38,22 @@ def read_chunk_from_db(raw_path, start_idx, stop_idx, step_size=1000):
 
 
 class NablaDFT(BaseDataset):
+    """
+    NablaDFT is a dataset constructed from a subset of the
+    [Molecular Sets (MOSES) dataset](https://github.com/molecularsets/moses) consisting of 1 million molecules
+    with 5,340,152 unique conformations generated using ωB97X-D/def2-SVP level of theory.
+
+    Usage:
+    ```python
+    from openqdc.datasets import NablaDFT
+    dataset = NablaDFT()
+    ```
+
+    References:
+    - https://pubs.rsc.org/en/content/articlelanding/2022/CP/D2CP03966D
+    - https://github.com/AIRI-Institute/nablaDFT
+    """
+
     __name__ = "nabladft"
     __energy_methods__ = ["wb97x-d_svp"]
 

diff --git a/src/openqdc/datasets/spice.py b/src/openqdc/datasets/spice.py
@@ -32,8 +32,25 @@ def read_record(r):
 
 
 class Spice(BaseDataset):
+    """
+    Spice Dataset consists of 1.1 million conformations for a diverse set of 19k unique molecules consisting of
+    small molecules, dimers, dipeptides, and solvated amino acids. It consists of both forces and energies calculated
+    at {\omega}B97M-D3(BJ)/def2-TZVPPD level of theory.
+
+    Usage:
+    ```python
+    from openqdc.datasets import Spice
+    dataset = Spice()
+    ```
+
+    References:
+    - https://arxiv.org/abs/2209.10702
+    - https://github.com/openmm/spice-dataset
+    """
+
     __name__ = "spice"
     __energy_methods__ = ["wb97x_tz"]
+    __force_methods__ = ["wb97x_tz"]
 
     energy_target_names = ["dft_total_energy"]