Merge branch 'spontaneous_symmetry_breaking' into refactor_oct19

mila-iqia · Oct 19, 2024 · 9623653 · 9623653
2 parents 0aaa0c8 + 6fd1cd8
commit 9623653
Showing 7 changed files with 820 additions and 1 deletion.
diff --git a/...iffusion/analysis/analytic_score/exploring_langevin_generator/generate_sample_energies.py b/...iffusion/analysis/analytic_score/exploring_langevin_generator/generate_sample_energies.py
@@ -49,7 +49,7 @@ def compute_oracle_energies(self, batch_relative_coordinates: torch.Tensor) -> n
 
         logger.info("Compute energy from Oracle")
         with tempfile.TemporaryDirectory() as tmp_work_dir:
-            for positions, box in zip(batch_cartesian_positions.numpy(), batched_unit_cells.numpy()):
+            for positions, box in zip(batch_cartesian_positions.cpu().numpy(), batched_unit_cells.cpu().numpy()):
                 energy, forces = get_energy_and_forces_from_lammps(positions,
                                                                    box,
                                                                    self.atom_types,

diff --git a/experiment_analysis/dataset_analysis/dataset_covariance.py b/experiment_analysis/dataset_analysis/dataset_covariance.py
@@ -0,0 +1,78 @@
+"""Effective Dataset Variance.
+
+The goal of this script is to compute the effective "sigma_d" of the
+actual datasets, that is, the standard deviation of the displacement
+from equilibrium, in fractional coordinates.
+"""
+import logging
+
+import einops
+import torch
+from tqdm import tqdm
+
+from crystal_diffusion import ANALYSIS_RESULTS_DIR, DATA_DIR
+from crystal_diffusion.data.diffusion.data_loader import (
+    LammpsForDiffusionDataModule, LammpsLoaderParameters)
+from crystal_diffusion.utils.basis_transformations import \
+    map_relative_coordinates_to_unit_cell
+from crystal_diffusion.utils.logging_utils import setup_analysis_logger
+
+logger = logging.getLogger(__name__)
+dataset_name = 'si_diffusion_2x2x2'
+# dataset_name = 'si_diffusion_1x1x1'
+
+output_dir = ANALYSIS_RESULTS_DIR / "covariances"
+output_dir.mkdir(exist_ok=True)
+
+
+if dataset_name == 'si_diffusion_1x1x1':
+    max_atom = 8
+    translation = torch.tensor([0.125, 0.125, 0.125])
+elif dataset_name == 'si_diffusion_2x2x2':
+    max_atom = 64
+    translation = torch.tensor([0.0625, 0.0625, 0.0625])
+
+lammps_run_dir = DATA_DIR / dataset_name
+processed_dataset_dir = lammps_run_dir / "processed"
+
+cache_dir = lammps_run_dir / "cache"
+
+data_params = LammpsLoaderParameters(batch_size=2048, max_atom=max_atom)
+
+if __name__ == '__main__':
+    setup_analysis_logger()
+    logger.info(f"Computing the covariance matrix for {dataset_name}")
+
+    datamodule = LammpsForDiffusionDataModule(
+        lammps_run_dir=lammps_run_dir,
+        processed_dataset_dir=processed_dataset_dir,
+        hyper_params=data_params,
+        working_cache_dir=cache_dir,
+    )
+    datamodule.setup()
+
+    train_dataset = datamodule.train_dataset
+
+    list_means = []
+    for batch in tqdm(datamodule.train_dataloader(), "Mean"):
+        x = map_relative_coordinates_to_unit_cell(batch['relative_coordinates'] + translation)
+        list_means.append(x.mean(dim=0))
+
+    # Drop the last batch, which might not have dimension batch_size
+    x0 = torch.stack(list_means[:-1]).mean(dim=0)
+
+    list_covariances = []
+    list_sizes = []
+    for batch in tqdm(datamodule.train_dataloader(), "displacements"):
+        x = map_relative_coordinates_to_unit_cell(batch['relative_coordinates'] + translation)
+        list_sizes.append(x.shape[0])
+        displacements = einops.rearrange(x - x0, "batch natoms space -> batch (natoms space)")
+        covariance = (displacements[:, None, :] * displacements[:, :, None]).sum(dim=0)
+        list_covariances.append(covariance)
+
+    covariance = torch.stack(list_covariances).sum(dim=0) / sum(list_sizes)
+
+    output_file = output_dir / f"covariance_{dataset_name}.pkl"
+    logger.info(f"Writing to file {output_file}...")
+    with open(output_file, 'wb') as fd:
+        torch.save(covariance, fd)
diff --git a/experiment_analysis/dataset_analysis/plot_si_phonon_DOS.py b/experiment_analysis/dataset_analysis/plot_si_phonon_DOS.py
@@ -0,0 +1,86 @@
+"""Silicon phonon Density of States.
+
+The displacement covariance is related to the phonon dynamical matrix.
+Here we extract the corresponding phonon density of state, based on this covariance,
+to see if the energy scales match up.
+"""
+import matplotlib.pyplot as plt
+import torch
+
+from crystal_diffusion import ANALYSIS_RESULTS_DIR
+from crystal_diffusion.analysis import PLEASANT_FIG_SIZE, PLOT_STYLE_PATH
+
+plt.style.use(PLOT_STYLE_PATH)
+
+# Define some constants.
+kelvin_in_Ha = 0.000_003_166_78
+T_in_kelvin = 300.0
+bohr_in_angst = 0.529177
+Si_mass = 28.0855
+proton_mass = 1836.152673426
+
+Ha_in_meV = 27211.0
+
+THz_in_meV = 4.136
+
+acell = 5.43
+
+
+dataset_name_2x2x2 = "si_diffusion_2x2x2"
+dataset_name_1x1x1 = "si_diffusion_1x1x1"
+
+output_dir = ANALYSIS_RESULTS_DIR / "covariances"
+output_dir.mkdir(exist_ok=True)
+
+
+if __name__ == "__main__":
+    kBT = kelvin_in_Ha * T_in_kelvin
+    a = acell / bohr_in_angst
+
+    M = Si_mass * proton_mass
+
+    constant_1x1x1 = M * a**2 / kBT / Ha_in_meV**2
+    constant_2x2x2 = M * (2.0 * a) ** 2 / kBT / Ha_in_meV**2
+
+    covariance_file_1x1x1 = output_dir / f"covariance_{dataset_name_1x1x1}.pkl"
+    sigma_1x1x1 = torch.load(covariance_file_1x1x1)
+    sigma_inv_1x1x1 = torch.linalg.pinv(sigma_1x1x1)
+    Omega_1x1x1 = sigma_inv_1x1x1 / constant_1x1x1
+    omega2_1x1x1 = torch.linalg.eigvalsh(Omega_1x1x1)
+    list_omega_in_meV_1x1x1 = torch.sqrt(torch.abs(omega2_1x1x1))
+
+    covariance_file_2x2x2 = output_dir / f"covariance_{dataset_name_2x2x2}.pkl"
+    sigma_2x2x2 = torch.load(covariance_file_2x2x2)
+    sigma_inv_2x2x2 = torch.linalg.pinv(sigma_2x2x2)
+    Omega_2x2x2 = sigma_inv_2x2x2 / constant_2x2x2
+    omega2_2x2x2 = torch.linalg.eigvalsh(Omega_2x2x2)
+    list_omega_in_meV_2x2x2 = torch.sqrt(torch.abs(omega2_2x2x2))
+
+    max_hw = torch.max(list_omega_in_meV_2x2x2) / THz_in_meV
+
+    bins = torch.linspace(0.0, max_hw, 50)
+
+    fig = plt.figure(figsize=PLEASANT_FIG_SIZE)
+    fig.suptitle("Eigenvalues of Dynamical Matrix, from Displacement Covariance")
+    ax = fig.add_subplot(111)
+
+    ax.set_xlim(0, max_hw + 1)
+    ax.set_xlabel(r"$\hbar \omega$ (THz)")
+    ax.set_ylabel("Count")
+
+    ax.hist(
+        list_omega_in_meV_1x1x1 / THz_in_meV,
+        bins=bins,
+        label="Si 1x1x1",
+        color="green",
+        alpha=0.5,
+    )
+    ax.hist(
+        list_omega_in_meV_2x2x2 / THz_in_meV,
+        bins=bins,
+        label="Si 2x2x2",
+        color="blue",
+        alpha=0.25,
+    )
+    ax.legend(loc=0)
+    plt.show()