adding samplers

khaledkah · Apr 3, 2024 · 89ba818 · 89ba818
1 parent 54c8a7e
commit 89ba818
Show file tree

Hide file tree

Showing 28 changed files with 1,608 additions and 1,072 deletions.
diff --git a/notebooks/denoising_tutorial.ipynb b/notebooks/denoising_tutorial.ipynb
diff --git a/notebooks/diffusion_tutorial.ipynb b/notebooks/diffusion_tutorial.ipynb
diff --git a/src/morered/__init__.py b/src/morered/__init__.py
@@ -5,6 +5,8 @@
     noise_schedules,
     optimization,
     transform,
+    sampling,
+    callbacks,
     utils,
 )
 from morered.task import *
diff --git a/src/morered/callbacks.py b/src/morered/callbacks.py
@@ -100,7 +100,8 @@ class SamplerCallback(Callback):
     def __init__(
         self,
         sampler: Sampler,
-        t: Optional[Union[int, torch.Tensor]] = None,
+        t: Optional[int] = None,
+        max_steps: Optional[int] = None,
         sample_prior: bool = True,
         name: str = "sampling",
         store_path: str = "samples",
@@ -115,6 +116,7 @@ def __init__(
         Args:
             sampler: sampler to be used for sampling/denoising.
             t: time step to start denoising. Defaults noise to start from prior.
+            max_steps: maximum number of reverse steps when using MoreRed.
             sample_prior: whether to sample from the prior or use input as start sample.
             name: name of the callback.
             store_path: path to store the results and samples.
@@ -129,6 +131,7 @@ def __init__(
         super().__init__()
         self.sampler = sampler
         self.t = t
+        self.max_steps = max_steps
         self.sample_prior = sample_prior
         self.name = name
         self.store_path = store_path
@@ -139,9 +142,6 @@ def __init__(
         self.log_validity = log_validity
         self.bonds_data = generate_bonds_data(bonds_data_path)
 
-        if isinstance(self.t, int):
-            self.t = torch.tensor([self.t])
-
         if not os.path.exists(self.store_path):
             os.makedirs(self.store_path)
 
@@ -158,9 +158,14 @@ def sample(
         # update the sampling model
         self.sampler.update_model(model)
 
+        # sample from the prior
+        if self.sample_prior:
+            x_t = self.sampler.sample_prior(batch, self.t)
+            batch.update(x_t)
+
         # sample / denoise
         samples, num_steps, hist = self.sampler(
-            batch, self.t, self.sample_prior  # type: ignore
+            batch, t=self.t, max_steps=self.max_steps
         )
 
         # add important properties to save along with the sampled ones
@@ -186,9 +191,9 @@ def sample(
         results = {
             "samples": samples,
             "hist": hist,
-            "num_steps": num_steps.cpu()
-            if isinstance(num_steps, torch.Tensor)
-            else num_steps,
+            "num_steps": (
+                num_steps.cpu() if isinstance(num_steps, torch.Tensor) else num_steps
+            ),
             "t": self.t.cpu() if isinstance(self.t, torch.Tensor) else self.t,
         }
 
@@ -255,14 +260,14 @@ def _step(
             connected = np.array(validity_res["connected"])
             connected_wo_h = np.array(validity_res["connected_wo_h"])
             results["bonds"] = validity_res["bonds"]
-            results["connectivity"] = torch.from_numpy(connected).to("cpu")
-            results["stable_atoms"] = torch.from_numpy(stable_ats).to("cpu")
-            results["stable_molecules"] = torch.from_numpy(stable_mols).to("cpu")
-            results["stable_atoms_wo_h"] = torch.from_numpy(stable_ats_wo_h).to("cpu")
+            results["connectivity"] = torch.from_numpy(connected).cpu()
+            results["stable_atoms"] = torch.from_numpy(stable_ats).cpu()
+            results["stable_molecules"] = torch.from_numpy(stable_mols).cpu()
+            results["stable_atoms_wo_h"] = torch.from_numpy(stable_ats_wo_h).cpu()
             results["stable_molecules_wo_h"] = torch.from_numpy(stable_mols_wo_h).to(
                 "cpu"
             )
-            results["connectivity_wo_h"] = torch.from_numpy(connected_wo_h).to("cpu")
+            results["connectivity_wo_h"] = torch.from_numpy(connected_wo_h).cpu()
 
             # infer metrics from validity results
             metrics = {
@@ -295,7 +300,7 @@ def _step(
                 else batch[properties.R]
             )
 
-            res_rmsd = batch_rmsd(reference_R, results["samples"]).to("cpu")
+            res_rmsd = batch_rmsd(reference_R, results["samples"]).cpu()
 
             results["rmsd"] = res_rmsd
             metrics["rmsd"] = res_rmsd.mean()

diff --git a/src/morered/configs/callbacks/sampling.yaml b/src/morered/configs/callbacks/sampling.yaml
@@ -0,0 +1,15 @@
+sampling:
+    _target_: morered.callbacks.SamplerCallback
+    sampler: ${sampler}
+    name: sampling
+    t: ???
+    max_steps: ???
+    sample_prior: True
+    store_path: samples
+    every_n_batchs: 1
+    every_n_epochs: 200
+    start_epoch: 1
+    log_rmsd: False
+    log_validity: True
+    bonds_data_path: null
+
diff --git a/src/morered/configs/experiment/vp_gauss_ddpm _qm9.yaml b/src/morered/configs/experiment/vp_gauss_ddpm _qm9.yaml
@@ -0,0 +1,5 @@
+# @package _global_
+
+defaults:
+  - vp_gauss_ddpm
+  - override /data: qm9_filtered
diff --git a/.../configs/experiment/vp_gauss_ddpm_jt.yaml → ...red/configs/experiment/vp_gauss_ddpm.yaml b/.../configs/experiment/vp_gauss_ddpm_jt.yaml → ...red/configs/experiment/vp_gauss_ddpm.yaml
@@ -4,9 +4,10 @@ defaults:
   - override /model: nnp
   - override /data: qm7x
   - override /task: diffusion_task
+  - override /sampler: ddpm
 
 run:
-  experiment: vp_gauss_ddpm_jt
+  experiment: vp_gauss_ddpm
 
 globals:
   cutoff: 5.
@@ -15,7 +16,6 @@ globals:
   noise_target_key: eps
   noise_output_key: eps_pred
   time_target_key: t
-  time_output_key: t_pred
 
   noise_schedule:
     _target_: morered.noise_schedules.PolynomialSchedule
@@ -46,7 +46,6 @@ data:
     - _target_: morered.transform.Diffuse
       diffuse_property: _positions
       diffusion_process: ${globals.diffusion_process}
-      T: ${globals.noise_schedule.T}
       time_key: ${globals.time_target_key}
 
     - _target_: schnetpack.transform.MatScipyNeighborList
@@ -61,22 +60,15 @@ model:
       cutoff: ${globals.cutoff}
     n_atom_basis: ${globals.n_atom_basis}
   output_modules:
-    - _target_: morered.model.heads.DiffusionTime
-      n_in: ${globals.n_atom_basis}
-      n_hidden: null
-      n_layers: 3
-      output_key: ${globals.time_output_key}
-      aggregation_mode: null
-      detach_representation: False
     - _target_: morered.model.heads.TimeAwareEquivariant
       n_in: ${globals.n_atom_basis}
       n_hidden: null
       n_layers: 3
       output_key: ${globals.noise_output_key}
       include_time: True
-      time_head: ${model.output_modules.0}
+      time_head: null
       detach_time_head: False
-      time_key: ${globals.time_output_key}
+      time_key: ${globals.time_target_key}
   do_postprocessing: True
   postprocessors:
     - _target_: morered.transform.BatchSubtractCenterOfMass
@@ -88,16 +80,6 @@ task:
   skip_exploding_batches: True
   include_l0: False
   outputs:
-    - _target_: schnetpack.task.ModelOutput
-      name: ${globals.time_output_key}
-      target_property: ${globals.time_target_key}
-      loss_fn:
-        _target_: torch.nn.MSELoss
-      metrics:
-        mse:
-          _target_: torchmetrics.regression.MeanSquaredError
-          squared: True
-      loss_weight: 0.1
     - _target_: morered.task.DiffModelOutput
       name: ${globals.noise_output_key}
       target_property: ${globals.noise_target_key}
@@ -107,7 +89,7 @@ task:
         mse:
           _target_: torchmetrics.regression.MeanSquaredError
           squared: True
-      loss_weight: 0.9
+      loss_weight: 1.0
       nll_metric: null
         # _target_: morered.optimization.metrics.NLL
         # noise_schedule: ${globals.noise_schedule}
@@ -118,3 +100,11 @@ task:
         # time_key: ${globals.time_target_key}
         # noise_key: ${globals.noise_target_key}
         # noise_pred_key: ${globals.noise_output_key}
+
+sampler:
+  denoiser: null
+
+callbacks:
+  sampling:
+    t: null
+    max_steps: null
diff --git a/src/morered/configs/experiment/vp_gauss_morered_jt.yaml b/src/morered/configs/experiment/vp_gauss_morered_jt.yaml
@@ -0,0 +1,77 @@
+# @package _global_
+
+defaults:
+  - vp_gauss_ddpm
+  - override /sampler: morered_jt
+
+run:
+  experiment: vp_gauss_morered_jt
+
+globals:
+  time_output_key: t_pred
+
+model:
+  output_modules:
+    - _target_: morered.model.heads.DiffusionTime
+      n_in: ${globals.n_atom_basis}
+      n_hidden: null
+      n_layers: 3
+      output_key: ${globals.time_output_key}
+      aggregation_mode: null
+      detach_representation: False
+    - _target_: morered.model.heads.TimeAwareEquivariant
+      n_in: ${globals.n_atom_basis}
+      n_hidden: null
+      n_layers: 3
+      output_key: ${globals.noise_output_key}
+      include_time: True
+      time_head: ${model.output_modules.0}
+      detach_time_head: False
+      time_key: ${globals.time_output_key}
+
+task:
+  outputs:
+    - _target_: schnetpack.task.ModelOutput
+      name: ${globals.time_output_key}
+      target_property: ${globals.time_target_key}
+      loss_fn:
+        _target_: torch.nn.MSELoss
+      metrics:
+        mse:
+          _target_: torchmetrics.regression.MeanSquaredError
+          squared: True
+      loss_weight: 0.1
+    - _target_: morered.task.DiffModelOutput
+      name: ${globals.noise_output_key}
+      target_property: ${globals.noise_target_key}
+      loss_fn:
+        _target_: torch.nn.MSELoss
+      metrics:
+        mse:
+          _target_: torchmetrics.regression.MeanSquaredError
+          squared: True
+      loss_weight: 0.9
+      nll_metric: null
+
+sampler:
+  denoiser: null
+
+callbacks:
+  sampling:
+    t: null
+    max_steps: 2000
+
+  # denoising:
+  #   _target_: morered.callbacks.SamplerCallback
+  #   sampler: ${sampler}
+  #   name: denoising
+  #   t: 150
+  #   max_steps: 1000
+  #   sample_prior: True
+  #   store_path: denoised
+  #   every_n_batchs: 1
+  #   every_n_epochs: 200
+  #   start_epoch: 1
+  #   log_rmsd: True
+  #   log_validity: True
+  #   bonds_data_path: null
diff --git a/src/morered/configs/experiment/vp_gauss_morered_jt_qm9.yaml b/src/morered/configs/experiment/vp_gauss_morered_jt_qm9.yaml
@@ -0,0 +1,5 @@
+# @package _global_
+
+defaults:
+  - vp_gauss_morered_jt
+  - override /data: qm9_filtered
diff --git a/src/morered/configs/experiment/vp_gauss_time_predictor.yaml b/src/morered/configs/experiment/vp_gauss_time_predictor.yaml
@@ -0,0 +1,42 @@
+# @package _global_
+
+defaults:
+  - vp_gauss_ddpm
+  - override /callbacks:
+    - checkpoint
+    - earlystopping
+    - lrmonitor
+    - ema
+
+run:
+  experiment: vp_gauss_time_predictor
+
+globals:
+  time_output_key: t_pred
+
+model:
+  output_modules:
+    - _target_: morered.model.heads.DiffusionTime
+      n_in: ${globals.n_atom_basis}
+      n_hidden: null
+      n_layers: 3
+      output_key: ${globals.time_output_key}
+      aggregation_mode: null
+      detach_representation: False
+  postprocessors:
+    - _target_: schnetpack.transform.CastTo64
+
+task:
+  outputs:
+    - _target_: schnetpack.task.ModelOutput
+      name: ${globals.time_output_key}
+      target_property: ${globals.time_target_key}
+      loss_fn:
+        _target_: torch.nn.MSELoss
+      metrics:
+        mse:
+          _target_: torchmetrics.regression.MeanSquaredError
+          squared: True
+      loss_weight: 1.0
+
+sampler: null
diff --git a/src/morered/configs/experiment/vp_gauss_time_predictor_qm9.yaml b/src/morered/configs/experiment/vp_gauss_time_predictor_qm9.yaml
@@ -0,0 +1,5 @@
+# @package _global_
+
+defaults:
+  - vp_gauss_time_predictor
+  - override /data: qm9_filtered
diff --git a/src/morered/configs/sampler/ddpm.yaml b/src/morered/configs/sampler/ddpm.yaml
@@ -0,0 +1,11 @@
+_target_: morered.sampling.DDPM
+diffusion_process: ${globals.diffusion_process}
+denoiser: ???
+time_key: ${globals.time_target_key}
+noise_pred_key: ${globals.noise_output_key}
+cutoff: ${globals.cutoff}
+recompute_neighbors: False
+save_progress: False
+progress_stride: 1
+results_on_cpu: True
+device: null
diff --git a/src/morered/configs/sampler/morered.yaml b/src/morered/configs/sampler/morered.yaml
@@ -0,0 +1,13 @@
+_target_: morered.sampling.MoreRed
+diffusion_process: ${globals.diffusion_process}
+denoiser: ???
+time_key: ${globals.time_target_key}
+noise_pred_key: ${globals.noise_output_key}
+time_pred_key: ${globals.time_output_key}
+convergence_step: 0
+cutoff: ${globals.cutoff}
+recompute_neighbors: False
+save_progress: False
+progress_stride: 1
+results_on_cpu: True
+device: null
diff --git a/src/morered/configs/sampler/morered_as.yaml b/src/morered/configs/sampler/morered_as.yaml
@@ -0,0 +1,5 @@
+defaults:
+  - morered
+
+_target_: morered.sampling.MoreRedAS
+time_predictor: ???
diff --git a/src/morered/configs/sampler/morered_itp.yaml b/src/morered/configs/sampler/morered_itp.yaml
@@ -0,0 +1,4 @@
+defaults:
+  - morered_as
+
+_target_: morered.sampling.MoreRedITP