-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from mila-iqia/lammps_output_preprocess
Lammps output preprocess
- Loading branch information
Showing
8 changed files
with
242 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import argparse | ||
import os | ||
from collections import defaultdict | ||
|
||
import pandas as pd | ||
import yaml | ||
|
||
|
||
def parse_lammps_output(lammps_dump: str, lammps_thermo_log: str, output_name: str): | ||
"""Parse a LAMMPS output file and save in a .csv format. | ||
Args: | ||
lammps_dump: LAMMPS output file | ||
lammps_thermo_log: LAMMPS thermodynamic variables output file | ||
output_name: name of parsed output written by the script | ||
""" | ||
if not os.path.exists(lammps_dump): | ||
raise ValueError(f'{lammps_dump} does not exist. Please provide a valid LAMMPS dump file as yaml.') | ||
|
||
if not os.path.exists(lammps_thermo_log): | ||
raise ValueError(f'{lammps_thermo_log} does not exist. Please provide a valid LAMMPS thermo log file as yaml.') | ||
|
||
# get the atom information (positions and forces) from the LAMMPS 'dump' file | ||
with open(lammps_dump, 'r') as f: | ||
dump_yaml = yaml.safe_load_all(f) | ||
# every MD iteration is saved as a separate document in the yaml file | ||
# prepare a dataframe to get all the data | ||
pd_data = defaultdict(list) | ||
for doc in dump_yaml: # loop over MD steps | ||
if 'id' not in doc['keywords']: # sanity check | ||
raise ValueError('id should be in LAMMPS dump file') | ||
atoms_info = defaultdict(list) # store information on atoms positions and forces here | ||
for data in doc['data']: # loop over the atoms to get their positions and forces | ||
for key, v in zip(doc['keywords'], data): | ||
if key not in ['id', 'type', 'x', 'y', 'z', 'fx', 'fy', 'fz']: | ||
continue | ||
else: | ||
atoms_info[key].append(v) # get positions or forces | ||
# add the information about that MD step to the dataframe | ||
for k, v in atoms_info.items(): # k should be id, type, x, y, z, fx, fy, fz | ||
pd_data[k].append(v) | ||
|
||
# get the total energy from the LAMMPS second output | ||
with open(lammps_thermo_log, 'r') as f: | ||
log_yaml = yaml.safe_load(f) | ||
kin_idx = log_yaml['keywords'].index('KinEng') | ||
pot_idx = log_yaml['keywords'].index('PotEng') | ||
pd_data['energy'] = [x[kin_idx] + x[pot_idx] for x in log_yaml['data']] | ||
|
||
if not output_name.endswith('.parquet'): | ||
output_name += '.parquet' | ||
|
||
pd.DataFrame(pd_data).to_parquet(output_name, engine='pyarrow', index=False) | ||
|
||
|
||
def main(): | ||
"""Main script to parse LAMMPS files and output a single parquet file.""" | ||
parser = argparse.ArgumentParser(description="Convert LAMMPS outputs in parquet file compatible with a dataloader.") | ||
parser.add_argument("--dump_file", type=str, help="LAMMPS dump file in yaml format.") | ||
parser.add_argument("--thermo_file", type=str, help="LAMMPS thermo output file in yaml format.") | ||
parser.add_argument("--output_name", type=str, help="Output name") | ||
args = parser.parse_args() | ||
|
||
parse_lammps_output(args.dump_file, args.thermo_file, args.output_name) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
log log.si-${T}-${S}.lammps | ||
|
||
units metal | ||
atom_style atomic | ||
atom_modify map array | ||
|
||
lattice diamond 5.43 | ||
region simbox block 0 ${S} 0 ${S} 0 ${S} | ||
create_box 1 simbox | ||
create_atoms 1 region simbox | ||
|
||
#read_dump ${DUMP} ${STEP} x y z vx vy vz fx fy fz box yes replace no purge yes add yes | ||
|
||
mass 1 28.0855 | ||
|
||
group Si type 1 | ||
|
||
pair_style sw | ||
pair_coeff * * si.sw Si | ||
|
||
velocity all create ${T} 62177 | ||
|
||
dump 1 all yaml 1 dump.si-${T}-${S}.yaml id type x y z fx fy fz | ||
|
||
thermo_style yaml | ||
thermo 1 | ||
#==========================Output files======================== | ||
|
||
fix 1 all nvt temp ${T} ${T} 0.01 | ||
run ${STEP} | ||
unfix 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
|
||
EXP_DIR="lammps_scripts/Si/si-custom/" | ||
DUMP_FILENAME="dump.si-300-1.yaml" | ||
THERMO_FILENAME="thermo_log.yaml" | ||
OUTPUT_NAME="demo.parquet" | ||
|
||
python crystal_diffusion/data/parse_lammps_outputs.py \ | ||
--dump_file ${EXP_DIR}/${DUMP_FILENAME} \ | ||
--thermo_file ${EXP_DIR}/${THERMO_FILENAME} \ | ||
--output_name ${EXP_DIR}/${OUTPUT_NAME} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#!/bin/bash | ||
|
||
TEMPERATURE=300 | ||
BOX_SIZE=1 | ||
|
||
lmp < lammps_input_example.lammps -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE | ||
|
||
# extract the thermodynamic outputs in a yaml file | ||
egrep '^(keywords:|data:$|---$|\.\.\.$| - \[)' log.lammps > log.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Stillinger-Weber parameters for various elements and mixtures | ||
# multiple entries can be added to this file, LAMMPS reads the ones it needs | ||
# these entries are in LAMMPS "metal" units: | ||
# epsilon = eV; sigma = Angstroms | ||
# other quantities are unitless | ||
|
||
# format of a single entry (one or more lines): | ||
# element 1, element 2, element 3, | ||
# epsilon, sigma, a, lambda, gamma, costheta0, A, B, p, q, tol | ||
|
||
# Here are the original parameters in metal units, for Silicon from: | ||
# | ||
# Stillinger and Weber, Phys. Rev. B, v. 31, p. 5262, (1985) | ||
# | ||
# Parameters for 'dia' Si | ||
Si Si Si 2.1683 2.0951 1.80 21.0 1.20 -0.333333333333 | ||
7.049556277 0.6022245584 4.0 0.0 0.0 | ||
# | ||
# Parameters for amorphous Si with the modified SW potential | ||
#(R. L. C. Vink, G. T. Barkema, W. F. van der Weg et N. Mousseau, A semi-empirical potential for amorphous silicon, J. Non-Cryst. Sol. 282, 248-255 (2001)) | ||
#Si Si Si 1.64833 2.0951 1.80 31.5 1.20 -0.333333333333 | ||
# 7.049556277 0.6022245584 4.0 0.0 0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
datasets==2.17.1 | ||
flake8==4.0.1 | ||
flake8-docstrings==1.6.0 | ||
gitpython==3.1.27 | ||
isort==5.13.2 | ||
jupyter==1.0.0 | ||
jinja2==3.1.2 | ||
myst-parser==2.0.0 | ||
orion>=0.2.4.post1 | ||
pyarrow==15.0.0 | ||
pyyaml==6.0 | ||
pytest==7.1.2 | ||
pytest-cov==3.0.0 | ||
pytorch_lightning>=2.2.0 | ||
pytype==2024.2.13 | ||
sphinx==7.2.6 | ||
sphinx-autoapi==3.0.0 | ||
sphinx-rtd-theme==2.0.0 | ||
sphinxcontrib-napoleon==0.7 | ||
sphinxcontrib-katex==0.8.6 | ||
tensorboard==2.16.2 | ||
tqdm==4.64.0 | ||
torch==2.2.0 | ||
torchvision>=0.17.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import os | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import pytest | ||
import yaml | ||
|
||
from crystal_diffusion.data.parse_lammps_outputs import parse_lammps_output | ||
|
||
|
||
def generate_fake_yaml(filename, documents, multiple_docs=True): | ||
# Write the YAML content | ||
with open(filename, 'w') as yaml_file: | ||
if multiple_docs: | ||
yaml.dump_all(documents, yaml_file) | ||
else: | ||
yaml.dump(documents, yaml_file) | ||
|
||
|
||
@pytest.fixture | ||
def fake_lammps_yaml(tmpdir): | ||
# fake LAMMPS output file with 4 MD steps in 1D for 3 atoms | ||
yaml_content = [ | ||
{'keywords': ['id', 'type', 'x', 'fx'], | ||
'data': [[0, 1, 0.1, 0.01], [1, 2, 0.2, 0.02], [2, 1, 0.3, 0.03]]}, | ||
{'keywords': ['id', 'type', 'x', 'fx'], | ||
'data': [[0, 1, 1.1, 1.01], [1, 2, 1.2, 1.02], [2, 1, 1.3, 1.03]]}, | ||
{'keywords': ['id', 'type', 'x', 'fx'], | ||
'data': [[0, 1, 2.1, 2.01], [1, 2, 2.2, 2.02], [2, 1, 2.3, 2.03]]}, | ||
{'keywords': ['id', 'type', 'x', 'fx'], | ||
'data': [[0, 1, 3.1, 3.01], [1, 2, 3.2, 3.02], [2, 1, 3.3, 3.03]]}, | ||
] | ||
file = os.path.join(tmpdir, 'fake_lammps_dump.yaml') | ||
generate_fake_yaml(file, yaml_content) | ||
return file | ||
|
||
|
||
@pytest.fixture | ||
def fake_thermo_yaml(tmpdir): | ||
# fake LAMMPS thermo file with 4 MD steps | ||
yaml_content = { | ||
'keywords': ['KinEng', 'PotEng'], | ||
'data': [[0.4, 0.5], [1.4, 1.5], [2.4, 2.5], [3.4, 3.5]] | ||
} | ||
file = os.path.join(tmpdir, 'fake_lammps_thermo.yaml') | ||
generate_fake_yaml(file, yaml_content, multiple_docs=False) | ||
return file | ||
|
||
|
||
def test_parse_lammps_outputs(fake_lammps_yaml, fake_thermo_yaml, tmpdir): | ||
output_name = os.path.join(tmpdir, 'test.parquet') | ||
parse_lammps_output(fake_lammps_yaml, fake_thermo_yaml, output_name) | ||
# check that a file exists | ||
assert os.path.exists(output_name) | ||
|
||
df = pd.read_parquet(output_name) | ||
assert not df.empty | ||
|
||
assert len(df) == 4 | ||
|
||
for i, v in enumerate(['id', 'type', 'x', 'fx', 'energy']): | ||
assert v in df.keys() | ||
for x in range(4): | ||
if v == 'id': | ||
assert np.array_equal(df[v][x], [0, 1, 2]) | ||
elif v == 'type': | ||
assert np.array_equal(df[v][x], [1, 2, 1]) | ||
elif v == 'x': | ||
assert np.allclose(df[v][x], [x + 0.1 * y for y in range(1, 4)]) | ||
elif v == 'fx': | ||
assert np.allclose(df[v][x], [x + 0.01 * y for y in range(1, 4)]) | ||
else: # v == 'energy' | ||
assert np.allclose(df[v][x], [2 * x + 0.9]) |