Skip to content

Commit

Permalink
Merge branch 'spontaneous_symmetry_breaking' into refactor_oct19
Browse files Browse the repository at this point in the history
  • Loading branch information
rousseab committed Oct 19, 2024
2 parents 0aaa0c8 + 6fd1cd8 commit 9623653
Showing 7 changed files with 820 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -49,7 +49,7 @@ def compute_oracle_energies(self, batch_relative_coordinates: torch.Tensor) -> n

logger.info("Compute energy from Oracle")
with tempfile.TemporaryDirectory() as tmp_work_dir:
for positions, box in zip(batch_cartesian_positions.numpy(), batched_unit_cells.numpy()):
for positions, box in zip(batch_cartesian_positions.cpu().numpy(), batched_unit_cells.cpu().numpy()):
energy, forces = get_energy_and_forces_from_lammps(positions,
box,
self.atom_types,
78 changes: 78 additions & 0 deletions experiment_analysis/dataset_analysis/dataset_covariance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""Effective Dataset Variance.
The goal of this script is to compute the effective "sigma_d" of the
actual datasets, that is, the standard deviation of the displacement
from equilibrium, in fractional coordinates.
"""
import logging

import einops
import torch
from tqdm import tqdm

from crystal_diffusion import ANALYSIS_RESULTS_DIR, DATA_DIR
from crystal_diffusion.data.diffusion.data_loader import (
LammpsForDiffusionDataModule, LammpsLoaderParameters)
from crystal_diffusion.utils.basis_transformations import \
map_relative_coordinates_to_unit_cell
from crystal_diffusion.utils.logging_utils import setup_analysis_logger

logger = logging.getLogger(__name__)
dataset_name = 'si_diffusion_2x2x2'
# dataset_name = 'si_diffusion_1x1x1'

output_dir = ANALYSIS_RESULTS_DIR / "covariances"
output_dir.mkdir(exist_ok=True)


if dataset_name == 'si_diffusion_1x1x1':
max_atom = 8
translation = torch.tensor([0.125, 0.125, 0.125])
elif dataset_name == 'si_diffusion_2x2x2':
max_atom = 64
translation = torch.tensor([0.0625, 0.0625, 0.0625])

lammps_run_dir = DATA_DIR / dataset_name
processed_dataset_dir = lammps_run_dir / "processed"

cache_dir = lammps_run_dir / "cache"

data_params = LammpsLoaderParameters(batch_size=2048, max_atom=max_atom)

if __name__ == '__main__':
setup_analysis_logger()
logger.info(f"Computing the covariance matrix for {dataset_name}")

datamodule = LammpsForDiffusionDataModule(
lammps_run_dir=lammps_run_dir,
processed_dataset_dir=processed_dataset_dir,
hyper_params=data_params,
working_cache_dir=cache_dir,
)
datamodule.setup()

train_dataset = datamodule.train_dataset

list_means = []
for batch in tqdm(datamodule.train_dataloader(), "Mean"):
x = map_relative_coordinates_to_unit_cell(batch['relative_coordinates'] + translation)
list_means.append(x.mean(dim=0))

# Drop the last batch, which might not have dimension batch_size
x0 = torch.stack(list_means[:-1]).mean(dim=0)

list_covariances = []
list_sizes = []
for batch in tqdm(datamodule.train_dataloader(), "displacements"):
x = map_relative_coordinates_to_unit_cell(batch['relative_coordinates'] + translation)
list_sizes.append(x.shape[0])
displacements = einops.rearrange(x - x0, "batch natoms space -> batch (natoms space)")
covariance = (displacements[:, None, :] * displacements[:, :, None]).sum(dim=0)
list_covariances.append(covariance)

covariance = torch.stack(list_covariances).sum(dim=0) / sum(list_sizes)

output_file = output_dir / f"covariance_{dataset_name}.pkl"
logger.info(f"Writing to file {output_file}...")
with open(output_file, 'wb') as fd:
torch.save(covariance, fd)
86 changes: 86 additions & 0 deletions experiment_analysis/dataset_analysis/plot_si_phonon_DOS.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""Silicon phonon Density of States.
The displacement covariance is related to the phonon dynamical matrix.
Here we extract the corresponding phonon density of state, based on this covariance,
to see if the energy scales match up.
"""
import matplotlib.pyplot as plt
import torch

from crystal_diffusion import ANALYSIS_RESULTS_DIR
from crystal_diffusion.analysis import PLEASANT_FIG_SIZE, PLOT_STYLE_PATH

plt.style.use(PLOT_STYLE_PATH)

# Define some constants.
kelvin_in_Ha = 0.000_003_166_78
T_in_kelvin = 300.0
bohr_in_angst = 0.529177
Si_mass = 28.0855
proton_mass = 1836.152673426

Ha_in_meV = 27211.0

THz_in_meV = 4.136

acell = 5.43


dataset_name_2x2x2 = "si_diffusion_2x2x2"
dataset_name_1x1x1 = "si_diffusion_1x1x1"

output_dir = ANALYSIS_RESULTS_DIR / "covariances"
output_dir.mkdir(exist_ok=True)


if __name__ == "__main__":
kBT = kelvin_in_Ha * T_in_kelvin
a = acell / bohr_in_angst

M = Si_mass * proton_mass

constant_1x1x1 = M * a**2 / kBT / Ha_in_meV**2
constant_2x2x2 = M * (2.0 * a) ** 2 / kBT / Ha_in_meV**2

covariance_file_1x1x1 = output_dir / f"covariance_{dataset_name_1x1x1}.pkl"
sigma_1x1x1 = torch.load(covariance_file_1x1x1)
sigma_inv_1x1x1 = torch.linalg.pinv(sigma_1x1x1)
Omega_1x1x1 = sigma_inv_1x1x1 / constant_1x1x1
omega2_1x1x1 = torch.linalg.eigvalsh(Omega_1x1x1)
list_omega_in_meV_1x1x1 = torch.sqrt(torch.abs(omega2_1x1x1))

covariance_file_2x2x2 = output_dir / f"covariance_{dataset_name_2x2x2}.pkl"
sigma_2x2x2 = torch.load(covariance_file_2x2x2)
sigma_inv_2x2x2 = torch.linalg.pinv(sigma_2x2x2)
Omega_2x2x2 = sigma_inv_2x2x2 / constant_2x2x2
omega2_2x2x2 = torch.linalg.eigvalsh(Omega_2x2x2)
list_omega_in_meV_2x2x2 = torch.sqrt(torch.abs(omega2_2x2x2))

max_hw = torch.max(list_omega_in_meV_2x2x2) / THz_in_meV

bins = torch.linspace(0.0, max_hw, 50)

fig = plt.figure(figsize=PLEASANT_FIG_SIZE)
fig.suptitle("Eigenvalues of Dynamical Matrix, from Displacement Covariance")
ax = fig.add_subplot(111)

ax.set_xlim(0, max_hw + 1)
ax.set_xlabel(r"$\hbar \omega$ (THz)")
ax.set_ylabel("Count")

ax.hist(
list_omega_in_meV_1x1x1 / THz_in_meV,
bins=bins,
label="Si 1x1x1",
color="green",
alpha=0.5,
)
ax.hist(
list_omega_in_meV_2x2x2 / THz_in_meV,
bins=bins,
label="Si 2x2x2",
color="blue",
alpha=0.25,
)
ax.legend(loc=0)
plt.show()
Loading

0 comments on commit 9623653

Please sign in to comment.