From b492a4b2b54b2557678916c08981539a2c1128ec Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 14:03:33 -0500 Subject: [PATCH 01/32] scripts to preprocess LAMMPS outputs --- crystal_diffusion/data/parse_lammps.sh | 11 +++ .../data/parse_lammps_outputs.py | 68 +++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100755 crystal_diffusion/data/parse_lammps.sh create mode 100644 crystal_diffusion/data/parse_lammps_outputs.py diff --git a/crystal_diffusion/data/parse_lammps.sh b/crystal_diffusion/data/parse_lammps.sh new file mode 100755 index 00000000..bcbb2079 --- /dev/null +++ b/crystal_diffusion/data/parse_lammps.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +EXP_DIR="lammps_scripts/Si/si-custom/" +DUMP_FILENAME="dump.si-300-1.yaml" +THERMO_FILENAME="thermo_log.yaml" +OUTPUT_NAME="demo.parquet" + +python crystal_diffusion/data/parse_lammps_outputs.py \ + --dump_file ${EXP_DIR}/${DUMP_FILENAME} \ + --thermo_file ${EXP_DIR}/${THERMO_FILENAME} \ + --output_name ${EXP_DIR}/${OUTPUT_NAME} diff --git a/crystal_diffusion/data/parse_lammps_outputs.py b/crystal_diffusion/data/parse_lammps_outputs.py new file mode 100644 index 00000000..06c84bd5 --- /dev/null +++ b/crystal_diffusion/data/parse_lammps_outputs.py @@ -0,0 +1,68 @@ +import argparse +import os + +import pandas as pd +import yaml + + +def parse_lammps_output(lammps_dump: str, lammps_thermo_log: str, output_name: str): + """Parse a LAMMPS output file and save in a .csv format. + + Args: + lammps_dump: LAMMPS output file + lammps_thermo_log: LAMMPS + output_name: name of parsed output written by the script + """ + if not os.path.exists(lammps_dump): + raise ValueError(f'{lammps_dump} does not exist. Please provide a valid LAMMPS dump file as yaml.') + + if not os.path.exists(lammps_thermo_log): + raise ValueError(f'{lammps_thermo_log} does not exist. Please provide a valid LAMMPS thermo log file as yaml.') + + # get the atom information (positions and forces) from the LAMMPS 'dump' file + with open(lammps_dump, 'r') as f: + dump_yaml = yaml.safe_load_all(f) + # every MD iteration is saved as a separate document in the yaml file + # prepare a dataframe to get all the data + pd_data = {} + for doc in dump_yaml: # loop over MD steps + if 'id' not in doc['keywords']: # sanity check + raise ValueError('id should be in LAMMPS dump file') + atoms_info = {} # store information on atoms positions and forces here + for data in doc['data']: # loop over the atoms to get their positions and forces + for key, v in zip(doc['keywords'], data): + print(key, v) + if key not in ['x', 'y', 'z', 'fx', 'fy', 'fz']: + continue + else: + atoms_info[key] = atoms_info.get(key, []) + [v] # get positions or forces + # add the information about that MD step to the dataframe + for k, v in atoms_info.items(): # k should be x, y, z, fx, fy, fz + pd_data[k] = pd_data.get(k, []) + [v] + + # get the total energy from the LAMMPS second output + with open(lammps_thermo_log, 'r') as f: + log_yaml = yaml.safe_load(f) + kin_idx = log_yaml['keywords'].index('KinEng') + pot_idx = log_yaml['keywords'].index('PotEng') + pd_data['energy'] = [x[kin_idx] + x[pot_idx] for x in log_yaml['data']] + + if not output_name.endswith('.parquet'): + output_name += '.parquet' + + pd.DataFrame(pd_data).to_parquet(output_name, engine='pyarrow') + + +def main(): + """Main script to parse LAMMPS files and output a single parquet file.""" + parser = argparse.ArgumentParser(description="Convert LAMMPS outputs in parquet file compatible with a dataloader.") + parser.add_argument("--dump_file", type=str, help="LAMMPS dump file in yaml format.") + parser.add_argument("--thermo_file", type=str, help="LAMMPS thermo output file in yaml format.") + parser.add_argument("--output_name", type=str, help="Output name") + args = parser.parse_args() + + parse_lammps_output(args.dump_file, args.thermo_file, args.output_name) + + +if __name__ == '__main__': + main() From eda4bbd3a95480c3544d96c50e4c02463f80d7ac Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 14:09:08 -0500 Subject: [PATCH 02/32] example scripts to run LAMMPS --- data/lammps_input_example.lammps | 31 +++++++++++++++++++++++++++++++ data/run_lammps_example.sh | 6 ++++++ 2 files changed, 37 insertions(+) create mode 100755 data/lammps_input_example.lammps create mode 100644 data/run_lammps_example.sh diff --git a/data/lammps_input_example.lammps b/data/lammps_input_example.lammps new file mode 100755 index 00000000..ba9d70a5 --- /dev/null +++ b/data/lammps_input_example.lammps @@ -0,0 +1,31 @@ +log log.si-${T}-${S}.lammps + +units metal +atom_style atomic +atom_modify map array + +lattice diamond 5.43 +region simbox block 0 ${S} 0 ${S} 0 ${S} +create_box 1 simbox +create_atoms 1 region simbox + +#read_dump ${DUMP} ${STEP} x y z vx vy vz fx fy fz box yes replace no purge yes add yes + +mass 1 28.0855 + +group Si type 1 + +pair_style sw +pair_coeff * * ../Si.sw Si + +velocity all create ${T} 62177 + +dump 1 all yaml 1 dump.si-${T}-${S}.yaml id type x y z fx fy fz + +thermo_style yaml +thermo 1 +#==========================Output files======================== + +fix 1 all nvt temp ${T} ${T} 0.01 +run ${STEP} +unfix 1 diff --git a/data/run_lammps_example.sh b/data/run_lammps_example.sh new file mode 100644 index 00000000..66ba52c6 --- /dev/null +++ b/data/run_lammps_example.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +TEMPERATURE=300 +BOX_SIZE=1 + +lmp < data/lammps_input_example -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE \ No newline at end of file From ecae0825461b57e97a0458b6b78dc0a1ef6eded3 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 14:13:31 -0500 Subject: [PATCH 03/32] do not save index when writing parquet --- crystal_diffusion/data/parse_lammps_outputs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crystal_diffusion/data/parse_lammps_outputs.py b/crystal_diffusion/data/parse_lammps_outputs.py index 06c84bd5..d0fca2fe 100644 --- a/crystal_diffusion/data/parse_lammps_outputs.py +++ b/crystal_diffusion/data/parse_lammps_outputs.py @@ -50,7 +50,7 @@ def parse_lammps_output(lammps_dump: str, lammps_thermo_log: str, output_name: s if not output_name.endswith('.parquet'): output_name += '.parquet' - pd.DataFrame(pd_data).to_parquet(output_name, engine='pyarrow') + pd.DataFrame(pd_data).to_parquet(output_name, engine='pyarrow', index=False) def main(): From f31573ce2f23ec8b783916bcf06164ebfcfcb11a Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 14:28:50 -0500 Subject: [PATCH 04/32] save atoms id and type in parquet --- crystal_diffusion/data/parse_lammps_outputs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crystal_diffusion/data/parse_lammps_outputs.py b/crystal_diffusion/data/parse_lammps_outputs.py index d0fca2fe..52033742 100644 --- a/crystal_diffusion/data/parse_lammps_outputs.py +++ b/crystal_diffusion/data/parse_lammps_outputs.py @@ -32,7 +32,7 @@ def parse_lammps_output(lammps_dump: str, lammps_thermo_log: str, output_name: s for data in doc['data']: # loop over the atoms to get their positions and forces for key, v in zip(doc['keywords'], data): print(key, v) - if key not in ['x', 'y', 'z', 'fx', 'fy', 'fz']: + if key not in ['id', 'type', 'x', 'y', 'z', 'fx', 'fy', 'fz']: continue else: atoms_info[key] = atoms_info.get(key, []) + [v] # get positions or forces From 94eb4d1da314387fceb6c0b664b88fe9add81662 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 15:04:11 -0500 Subject: [PATCH 05/32] unit test --- tests/data/test_parse_lammps_output.py | 72 ++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 tests/data/test_parse_lammps_output.py diff --git a/tests/data/test_parse_lammps_output.py b/tests/data/test_parse_lammps_output.py new file mode 100644 index 00000000..4cf6d6c9 --- /dev/null +++ b/tests/data/test_parse_lammps_output.py @@ -0,0 +1,72 @@ +import os + +import numpy as np +import pandas as pd +import pytest +import yaml + +from crystal_diffusion.data.parse_lammps_outputs import parse_lammps_output + + +def generate_fake_yaml(filename, documents, multiple_docs=True): + # Write the YAML content + with open(filename, 'w') as yaml_file: + if multiple_docs: + yaml.dump_all(documents, yaml_file) + else: + yaml.dump(documents, yaml_file) + +@pytest.fixture +def fake_lammps_yaml(tmpdir): + # fake LAMMPS output file with 4 MD steps in 1D for 3 atoms + yaml_content = [ + {'keywords': ['id', 'type', 'x', 'fx'], + 'data': [[0, 1, 0.1, 0.01], [1, 2, 0.2, 0.02], [2, 1, 0.3, 0.03]]}, + {'keywords': ['id', 'type', 'x', 'fx'], + 'data': [[0, 1, 1.1, 1.01], [1, 2, 1.2, 1.02], [2, 1, 1.3, 1.03]]}, + {'keywords': ['id', 'type', 'x', 'fx'], + 'data': [[0, 1, 2.1, 2.01], [1, 2, 2.2, 2.02], [2, 1, 2.3, 2.03]]}, + {'keywords': ['id', 'type', 'x', 'fx'], + 'data': [[0, 1, 3.1, 3.01], [1, 2, 3.2, 3.02], [2, 1, 3.3, 3.03]]}, + ] + file = os.path.join(tmpdir, 'fake_lammps_dump.yaml') + generate_fake_yaml(file, yaml_content) + return file + + +@pytest.fixture +def fake_thermo_yaml(tmpdir): + # fake LAMMPS thermo file with 4 MD steps + yaml_content = { + 'keywords': ['KinEng', 'PotEng'], + 'data': [[0.4, 0.5], [1.4, 1.5], [2.4, 2.5], [3.4, 3.5]] + } + file = os.path.join(tmpdir, 'fake_lammps_thermo.yaml') + generate_fake_yaml(file, yaml_content, multiple_docs=False) + return file + + +def test_parse_lammps_outputs(fake_lammps_yaml, fake_thermo_yaml, tmpdir): + output_name = os.path.join(tmpdir, 'test.parquet') + parse_lammps_output(fake_lammps_yaml, fake_thermo_yaml, output_name) + # check that a file exists + assert os.path.exists(output_name) + + df = pd.read_parquet(output_name) + assert not df.empty + + assert len(df) == 4 + + for i, v in enumerate(['id', 'type', 'x', 'fx', 'energy']): + assert v in df.keys() + for x in range(4): + if v == 'id': + assert np.array_equal(df[v][x], [0, 1, 2]) + elif v == 'type': + assert np.array_equal(df[v][x], [1, 2, 1]) + elif v == 'x': + assert np.allclose(df[v][x], [x + 0.1 * y for y in range(1, 4)]) + elif v == 'fx': + assert np.allclose(df[v][x], [x + 0.01 * y for y in range(1, 4)]) + else: # v == 'energy' + assert np.allclose(df[v][x], [2 * x + 0.9]) \ No newline at end of file From 9a4f9e519230254594cb831afb539a5b8909e2f2 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 15:24:33 -0500 Subject: [PATCH 06/32] updated list of dependencies --- requirements.txt | 23 +++++++++++++++++++++++ setup.py | 28 +++++----------------------- 2 files changed, 28 insertions(+), 23 deletions(-) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..0ade435e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,23 @@ +datasets-2.17.1 +flake8==4.0.1 +flake8-docstrings==1.6.0 +gitpython==3.1.27 +jupyter==1.0.0 +jinja2==3.1.2 +myst-parser==2.0.0 +orion>=0.2.4.post1 +pyarrow-15.0.0 +pyyaml==6.0 +pytest==7.1.2 +pytest-cov==3.0.0 +pytorch_lightning>=2.2.0 +pytype==2024.2.13 +sphinx==7.2.6 +sphinx-autoapi==3.0.0 +sphinx-rtd-theme==2.0.0 +sphinxcontrib-napoleon==0.7 +sphinxcontrib-katex==0.8.6 +tensorboard==2.16.2 +tqdm==4.64.0 +torch==2.2.0 +torchvision>=0.17.0 diff --git a/setup.py b/setup.py index 9f1017c0..fb11dfb6 100644 --- a/setup.py +++ b/setup.py @@ -1,33 +1,15 @@ from setuptools import find_packages, setup + +with open('requirements.txt', 'r') as f: + requirements = f.readlines() + setup( name='crystal_diffusion', version='0.0.1', packages=find_packages(include=['crystal_diffusion', 'crystal_diffusion.*']), python_requires='>=3.11', - install_requires=[ - 'flake8==4.0.1', - 'flake8-docstrings==1.6.0', - 'gitpython==3.1.27', - 'jupyter==1.0.0', - 'jinja2==3.1.2', - 'myst-parser==2.0.0', - 'orion>=0.2.4.post1', - 'pyyaml==6.0', - 'pytest==7.1.2', - 'pytest-cov==3.0.0', - 'pytorch_lightning>=2.2.0', - 'pytype==2024.2.13', - 'sphinx==7.2.6', - 'sphinx-autoapi==3.0.0', - 'sphinx-rtd-theme==2.0.0', - 'sphinxcontrib-napoleon==0.7', - 'sphinxcontrib-katex==0.8.6', - 'tensorboard==2.16.2', - 'tqdm==4.64.0', - 'torch==2.2.0', - 'torchvision>=0.17.0', - ], + install_requires=requirements, entry_points={ 'console_scripts': [ 'cd-train=crystal_diffusion.train:main', From 3d9be3e7cf02b7868eef4092aa6c19bfae7be26c Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 15:32:44 -0500 Subject: [PATCH 07/32] flake8 errors --- crystal_diffusion/data/parse_lammps_outputs.py | 3 +-- tests/data/test_parse_lammps_output.py | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crystal_diffusion/data/parse_lammps_outputs.py b/crystal_diffusion/data/parse_lammps_outputs.py index 52033742..b23ddb5f 100644 --- a/crystal_diffusion/data/parse_lammps_outputs.py +++ b/crystal_diffusion/data/parse_lammps_outputs.py @@ -28,10 +28,9 @@ def parse_lammps_output(lammps_dump: str, lammps_thermo_log: str, output_name: s for doc in dump_yaml: # loop over MD steps if 'id' not in doc['keywords']: # sanity check raise ValueError('id should be in LAMMPS dump file') - atoms_info = {} # store information on atoms positions and forces here + atoms_info = {} # store information on atoms positions and forces here for data in doc['data']: # loop over the atoms to get their positions and forces for key, v in zip(doc['keywords'], data): - print(key, v) if key not in ['id', 'type', 'x', 'y', 'z', 'fx', 'fy', 'fz']: continue else: diff --git a/tests/data/test_parse_lammps_output.py b/tests/data/test_parse_lammps_output.py index 4cf6d6c9..bb82ae52 100644 --- a/tests/data/test_parse_lammps_output.py +++ b/tests/data/test_parse_lammps_output.py @@ -16,6 +16,7 @@ def generate_fake_yaml(filename, documents, multiple_docs=True): else: yaml.dump(documents, yaml_file) + @pytest.fixture def fake_lammps_yaml(tmpdir): # fake LAMMPS output file with 4 MD steps in 1D for 3 atoms @@ -69,4 +70,4 @@ def test_parse_lammps_outputs(fake_lammps_yaml, fake_thermo_yaml, tmpdir): elif v == 'fx': assert np.allclose(df[v][x], [x + 0.01 * y for y in range(1, 4)]) else: # v == 'energy' - assert np.allclose(df[v][x], [2 * x + 0.9]) \ No newline at end of file + assert np.allclose(df[v][x], [2 * x + 0.9]) From 06d4b51d76c2f8ebea6c3eb5fdb65b188e96283f Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 15:37:02 -0500 Subject: [PATCH 08/32] isort fix --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index fb11dfb6..2a9715ba 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,5 @@ from setuptools import find_packages, setup - with open('requirements.txt', 'r') as f: requirements = f.readlines() From 0a0ef8f361601ac2d4ad754f92f795301dfe6d8c Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 15:45:02 -0500 Subject: [PATCH 09/32] typo in requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0ade435e..3b402065 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -datasets-2.17.1 +datasets==2.17.1 flake8==4.0.1 flake8-docstrings==1.6.0 gitpython==3.1.27 From 8e6dc8498fbb161788fecb1975e44b696287d7ad Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 15:48:05 -0500 Subject: [PATCH 10/32] another typo in requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3b402065..c50c4cc3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ jupyter==1.0.0 jinja2==3.1.2 myst-parser==2.0.0 orion>=0.2.4.post1 -pyarrow-15.0.0 +pyarrow==15.0.0 pyyaml==6.0 pytest==7.1.2 pytest-cov==3.0.0 From b4e0c39779856b42bf58a5d30034b2d741f6ddba Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Fri, 23 Feb 2024 13:40:28 -0500 Subject: [PATCH 11/32] code review fixes --- data/lammps_input_example.lammps | 2 +- data/run_lammps_example.sh | 2 +- data/si.sw | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) create mode 100755 data/si.sw diff --git a/data/lammps_input_example.lammps b/data/lammps_input_example.lammps index ba9d70a5..9531b57d 100755 --- a/data/lammps_input_example.lammps +++ b/data/lammps_input_example.lammps @@ -16,7 +16,7 @@ mass 1 28.0855 group Si type 1 pair_style sw -pair_coeff * * ../Si.sw Si +pair_coeff * * data/si.sw Si velocity all create ${T} 62177 diff --git a/data/run_lammps_example.sh b/data/run_lammps_example.sh index 66ba52c6..f0262196 100644 --- a/data/run_lammps_example.sh +++ b/data/run_lammps_example.sh @@ -3,4 +3,4 @@ TEMPERATURE=300 BOX_SIZE=1 -lmp < data/lammps_input_example -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE \ No newline at end of file +lmp < data/lammps_input_example.lammps -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE \ No newline at end of file diff --git a/data/si.sw b/data/si.sw new file mode 100755 index 00000000..26cfc25b --- /dev/null +++ b/data/si.sw @@ -0,0 +1,22 @@ +# Stillinger-Weber parameters for various elements and mixtures +# multiple entries can be added to this file, LAMMPS reads the ones it needs +# these entries are in LAMMPS "metal" units: +# epsilon = eV; sigma = Angstroms +# other quantities are unitless + +# format of a single entry (one or more lines): +# element 1, element 2, element 3, +# epsilon, sigma, a, lambda, gamma, costheta0, A, B, p, q, tol + +# Here are the original parameters in metal units, for Silicon from: +# +# Stillinger and Weber, Phys. Rev. B, v. 31, p. 5262, (1985) +# +# Parameters for 'dia' Si +Si Si Si 2.1683 2.0951 1.80 21.0 1.20 -0.333333333333 + 7.049556277 0.6022245584 4.0 0.0 0.0 +# +# Parameters for amorphous Si with the modified SW potential +#(R. L. C. Vink, G. T. Barkema, W. F. van der Weg et N. Mousseau, A semi-empirical potential for amorphous silicon, J. Non-Cryst. Sol. 282, 248-255 (2001)) +#Si Si Si 1.64833 2.0951 1.80 31.5 1.20 -0.333333333333 +# 7.049556277 0.6022245584 4.0 0.0 0.0 From bf01d7bcd4d378c2aa9de28b92f3669e420d3fa7 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Fri, 23 Feb 2024 15:08:22 -0500 Subject: [PATCH 12/32] code review part 2 --- crystal_diffusion/data/parse_lammps_outputs.py | 2 +- data/lammps_input_example.lammps | 2 +- data/run_lammps_example.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crystal_diffusion/data/parse_lammps_outputs.py b/crystal_diffusion/data/parse_lammps_outputs.py index b23ddb5f..0aecf232 100644 --- a/crystal_diffusion/data/parse_lammps_outputs.py +++ b/crystal_diffusion/data/parse_lammps_outputs.py @@ -10,7 +10,7 @@ def parse_lammps_output(lammps_dump: str, lammps_thermo_log: str, output_name: s Args: lammps_dump: LAMMPS output file - lammps_thermo_log: LAMMPS + lammps_thermo_log: LAMMPS thermodynamic variables output file output_name: name of parsed output written by the script """ if not os.path.exists(lammps_dump): diff --git a/data/lammps_input_example.lammps b/data/lammps_input_example.lammps index 9531b57d..4aca590e 100755 --- a/data/lammps_input_example.lammps +++ b/data/lammps_input_example.lammps @@ -16,7 +16,7 @@ mass 1 28.0855 group Si type 1 pair_style sw -pair_coeff * * data/si.sw Si +pair_coeff * *si.sw Si velocity all create ${T} 62177 diff --git a/data/run_lammps_example.sh b/data/run_lammps_example.sh index f0262196..4e639cfe 100644 --- a/data/run_lammps_example.sh +++ b/data/run_lammps_example.sh @@ -3,4 +3,4 @@ TEMPERATURE=300 BOX_SIZE=1 -lmp < data/lammps_input_example.lammps -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE \ No newline at end of file +lmp < lammps_input_example.lammps -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE \ No newline at end of file From 5927511b163780d0be25d5db4a0eb8868b12f8b2 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Fri, 23 Feb 2024 15:20:46 -0500 Subject: [PATCH 13/32] missing space --- data/lammps_input_example.lammps | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/lammps_input_example.lammps b/data/lammps_input_example.lammps index 4aca590e..c2f77445 100755 --- a/data/lammps_input_example.lammps +++ b/data/lammps_input_example.lammps @@ -16,7 +16,7 @@ mass 1 28.0855 group Si type 1 pair_style sw -pair_coeff * *si.sw Si +pair_coeff * * si.sw Si velocity all create ${T} 62177 From da2b08b9c079550a2b73fa9f9e64ac832094a3d1 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 14:03:33 -0500 Subject: [PATCH 14/32] scripts to preprocess LAMMPS outputs --- crystal_diffusion/data/parse_lammps.sh | 11 +++ .../data/parse_lammps_outputs.py | 68 +++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100755 crystal_diffusion/data/parse_lammps.sh create mode 100644 crystal_diffusion/data/parse_lammps_outputs.py diff --git a/crystal_diffusion/data/parse_lammps.sh b/crystal_diffusion/data/parse_lammps.sh new file mode 100755 index 00000000..bcbb2079 --- /dev/null +++ b/crystal_diffusion/data/parse_lammps.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +EXP_DIR="lammps_scripts/Si/si-custom/" +DUMP_FILENAME="dump.si-300-1.yaml" +THERMO_FILENAME="thermo_log.yaml" +OUTPUT_NAME="demo.parquet" + +python crystal_diffusion/data/parse_lammps_outputs.py \ + --dump_file ${EXP_DIR}/${DUMP_FILENAME} \ + --thermo_file ${EXP_DIR}/${THERMO_FILENAME} \ + --output_name ${EXP_DIR}/${OUTPUT_NAME} diff --git a/crystal_diffusion/data/parse_lammps_outputs.py b/crystal_diffusion/data/parse_lammps_outputs.py new file mode 100644 index 00000000..06c84bd5 --- /dev/null +++ b/crystal_diffusion/data/parse_lammps_outputs.py @@ -0,0 +1,68 @@ +import argparse +import os + +import pandas as pd +import yaml + + +def parse_lammps_output(lammps_dump: str, lammps_thermo_log: str, output_name: str): + """Parse a LAMMPS output file and save in a .csv format. + + Args: + lammps_dump: LAMMPS output file + lammps_thermo_log: LAMMPS + output_name: name of parsed output written by the script + """ + if not os.path.exists(lammps_dump): + raise ValueError(f'{lammps_dump} does not exist. Please provide a valid LAMMPS dump file as yaml.') + + if not os.path.exists(lammps_thermo_log): + raise ValueError(f'{lammps_thermo_log} does not exist. Please provide a valid LAMMPS thermo log file as yaml.') + + # get the atom information (positions and forces) from the LAMMPS 'dump' file + with open(lammps_dump, 'r') as f: + dump_yaml = yaml.safe_load_all(f) + # every MD iteration is saved as a separate document in the yaml file + # prepare a dataframe to get all the data + pd_data = {} + for doc in dump_yaml: # loop over MD steps + if 'id' not in doc['keywords']: # sanity check + raise ValueError('id should be in LAMMPS dump file') + atoms_info = {} # store information on atoms positions and forces here + for data in doc['data']: # loop over the atoms to get their positions and forces + for key, v in zip(doc['keywords'], data): + print(key, v) + if key not in ['x', 'y', 'z', 'fx', 'fy', 'fz']: + continue + else: + atoms_info[key] = atoms_info.get(key, []) + [v] # get positions or forces + # add the information about that MD step to the dataframe + for k, v in atoms_info.items(): # k should be x, y, z, fx, fy, fz + pd_data[k] = pd_data.get(k, []) + [v] + + # get the total energy from the LAMMPS second output + with open(lammps_thermo_log, 'r') as f: + log_yaml = yaml.safe_load(f) + kin_idx = log_yaml['keywords'].index('KinEng') + pot_idx = log_yaml['keywords'].index('PotEng') + pd_data['energy'] = [x[kin_idx] + x[pot_idx] for x in log_yaml['data']] + + if not output_name.endswith('.parquet'): + output_name += '.parquet' + + pd.DataFrame(pd_data).to_parquet(output_name, engine='pyarrow') + + +def main(): + """Main script to parse LAMMPS files and output a single parquet file.""" + parser = argparse.ArgumentParser(description="Convert LAMMPS outputs in parquet file compatible with a dataloader.") + parser.add_argument("--dump_file", type=str, help="LAMMPS dump file in yaml format.") + parser.add_argument("--thermo_file", type=str, help="LAMMPS thermo output file in yaml format.") + parser.add_argument("--output_name", type=str, help="Output name") + args = parser.parse_args() + + parse_lammps_output(args.dump_file, args.thermo_file, args.output_name) + + +if __name__ == '__main__': + main() From 031c975ed11b9343a2d19c3ef8c90ce2e9d63342 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 14:09:08 -0500 Subject: [PATCH 15/32] example scripts to run LAMMPS --- data/lammps_input_example.lammps | 31 +++++++++++++++++++++++++++++++ data/run_lammps_example.sh | 6 ++++++ 2 files changed, 37 insertions(+) create mode 100755 data/lammps_input_example.lammps create mode 100644 data/run_lammps_example.sh diff --git a/data/lammps_input_example.lammps b/data/lammps_input_example.lammps new file mode 100755 index 00000000..ba9d70a5 --- /dev/null +++ b/data/lammps_input_example.lammps @@ -0,0 +1,31 @@ +log log.si-${T}-${S}.lammps + +units metal +atom_style atomic +atom_modify map array + +lattice diamond 5.43 +region simbox block 0 ${S} 0 ${S} 0 ${S} +create_box 1 simbox +create_atoms 1 region simbox + +#read_dump ${DUMP} ${STEP} x y z vx vy vz fx fy fz box yes replace no purge yes add yes + +mass 1 28.0855 + +group Si type 1 + +pair_style sw +pair_coeff * * ../Si.sw Si + +velocity all create ${T} 62177 + +dump 1 all yaml 1 dump.si-${T}-${S}.yaml id type x y z fx fy fz + +thermo_style yaml +thermo 1 +#==========================Output files======================== + +fix 1 all nvt temp ${T} ${T} 0.01 +run ${STEP} +unfix 1 diff --git a/data/run_lammps_example.sh b/data/run_lammps_example.sh new file mode 100644 index 00000000..66ba52c6 --- /dev/null +++ b/data/run_lammps_example.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +TEMPERATURE=300 +BOX_SIZE=1 + +lmp < data/lammps_input_example -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE \ No newline at end of file From ba9baa0443b40672ed9cffd355da7b3ee43b19f3 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 14:13:31 -0500 Subject: [PATCH 16/32] do not save index when writing parquet --- crystal_diffusion/data/parse_lammps_outputs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crystal_diffusion/data/parse_lammps_outputs.py b/crystal_diffusion/data/parse_lammps_outputs.py index 06c84bd5..d0fca2fe 100644 --- a/crystal_diffusion/data/parse_lammps_outputs.py +++ b/crystal_diffusion/data/parse_lammps_outputs.py @@ -50,7 +50,7 @@ def parse_lammps_output(lammps_dump: str, lammps_thermo_log: str, output_name: s if not output_name.endswith('.parquet'): output_name += '.parquet' - pd.DataFrame(pd_data).to_parquet(output_name, engine='pyarrow') + pd.DataFrame(pd_data).to_parquet(output_name, engine='pyarrow', index=False) def main(): From 04d76c42f59e6ea9f589ca39b6c257a59d2939c9 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 14:28:50 -0500 Subject: [PATCH 17/32] save atoms id and type in parquet --- crystal_diffusion/data/parse_lammps_outputs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crystal_diffusion/data/parse_lammps_outputs.py b/crystal_diffusion/data/parse_lammps_outputs.py index d0fca2fe..52033742 100644 --- a/crystal_diffusion/data/parse_lammps_outputs.py +++ b/crystal_diffusion/data/parse_lammps_outputs.py @@ -32,7 +32,7 @@ def parse_lammps_output(lammps_dump: str, lammps_thermo_log: str, output_name: s for data in doc['data']: # loop over the atoms to get their positions and forces for key, v in zip(doc['keywords'], data): print(key, v) - if key not in ['x', 'y', 'z', 'fx', 'fy', 'fz']: + if key not in ['id', 'type', 'x', 'y', 'z', 'fx', 'fy', 'fz']: continue else: atoms_info[key] = atoms_info.get(key, []) + [v] # get positions or forces From c7186bda8eec17850d46930f728cd93247df0309 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 15:04:11 -0500 Subject: [PATCH 18/32] unit test --- tests/data/test_parse_lammps_output.py | 72 ++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 tests/data/test_parse_lammps_output.py diff --git a/tests/data/test_parse_lammps_output.py b/tests/data/test_parse_lammps_output.py new file mode 100644 index 00000000..4cf6d6c9 --- /dev/null +++ b/tests/data/test_parse_lammps_output.py @@ -0,0 +1,72 @@ +import os + +import numpy as np +import pandas as pd +import pytest +import yaml + +from crystal_diffusion.data.parse_lammps_outputs import parse_lammps_output + + +def generate_fake_yaml(filename, documents, multiple_docs=True): + # Write the YAML content + with open(filename, 'w') as yaml_file: + if multiple_docs: + yaml.dump_all(documents, yaml_file) + else: + yaml.dump(documents, yaml_file) + +@pytest.fixture +def fake_lammps_yaml(tmpdir): + # fake LAMMPS output file with 4 MD steps in 1D for 3 atoms + yaml_content = [ + {'keywords': ['id', 'type', 'x', 'fx'], + 'data': [[0, 1, 0.1, 0.01], [1, 2, 0.2, 0.02], [2, 1, 0.3, 0.03]]}, + {'keywords': ['id', 'type', 'x', 'fx'], + 'data': [[0, 1, 1.1, 1.01], [1, 2, 1.2, 1.02], [2, 1, 1.3, 1.03]]}, + {'keywords': ['id', 'type', 'x', 'fx'], + 'data': [[0, 1, 2.1, 2.01], [1, 2, 2.2, 2.02], [2, 1, 2.3, 2.03]]}, + {'keywords': ['id', 'type', 'x', 'fx'], + 'data': [[0, 1, 3.1, 3.01], [1, 2, 3.2, 3.02], [2, 1, 3.3, 3.03]]}, + ] + file = os.path.join(tmpdir, 'fake_lammps_dump.yaml') + generate_fake_yaml(file, yaml_content) + return file + + +@pytest.fixture +def fake_thermo_yaml(tmpdir): + # fake LAMMPS thermo file with 4 MD steps + yaml_content = { + 'keywords': ['KinEng', 'PotEng'], + 'data': [[0.4, 0.5], [1.4, 1.5], [2.4, 2.5], [3.4, 3.5]] + } + file = os.path.join(tmpdir, 'fake_lammps_thermo.yaml') + generate_fake_yaml(file, yaml_content, multiple_docs=False) + return file + + +def test_parse_lammps_outputs(fake_lammps_yaml, fake_thermo_yaml, tmpdir): + output_name = os.path.join(tmpdir, 'test.parquet') + parse_lammps_output(fake_lammps_yaml, fake_thermo_yaml, output_name) + # check that a file exists + assert os.path.exists(output_name) + + df = pd.read_parquet(output_name) + assert not df.empty + + assert len(df) == 4 + + for i, v in enumerate(['id', 'type', 'x', 'fx', 'energy']): + assert v in df.keys() + for x in range(4): + if v == 'id': + assert np.array_equal(df[v][x], [0, 1, 2]) + elif v == 'type': + assert np.array_equal(df[v][x], [1, 2, 1]) + elif v == 'x': + assert np.allclose(df[v][x], [x + 0.1 * y for y in range(1, 4)]) + elif v == 'fx': + assert np.allclose(df[v][x], [x + 0.01 * y for y in range(1, 4)]) + else: # v == 'energy' + assert np.allclose(df[v][x], [2 * x + 0.9]) \ No newline at end of file From 4844f7c9e80f70dadd81c0e69d611f5f92f58255 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 15:24:33 -0500 Subject: [PATCH 19/32] updated list of dependencies --- requirements.txt | 23 +++++++++++++++++++++++ setup.py | 29 +++++------------------------ 2 files changed, 28 insertions(+), 24 deletions(-) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..0ade435e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,23 @@ +datasets-2.17.1 +flake8==4.0.1 +flake8-docstrings==1.6.0 +gitpython==3.1.27 +jupyter==1.0.0 +jinja2==3.1.2 +myst-parser==2.0.0 +orion>=0.2.4.post1 +pyarrow-15.0.0 +pyyaml==6.0 +pytest==7.1.2 +pytest-cov==3.0.0 +pytorch_lightning>=2.2.0 +pytype==2024.2.13 +sphinx==7.2.6 +sphinx-autoapi==3.0.0 +sphinx-rtd-theme==2.0.0 +sphinxcontrib-napoleon==0.7 +sphinxcontrib-katex==0.8.6 +tensorboard==2.16.2 +tqdm==4.64.0 +torch==2.2.0 +torchvision>=0.17.0 diff --git a/setup.py b/setup.py index 0db43ec0..fb11dfb6 100644 --- a/setup.py +++ b/setup.py @@ -1,34 +1,15 @@ from setuptools import find_packages, setup + +with open('requirements.txt', 'r') as f: + requirements = f.readlines() + setup( name='crystal_diffusion', version='0.0.1', packages=find_packages(include=['crystal_diffusion', 'crystal_diffusion.*']), python_requires='>=3.11', - install_requires=[ - 'flake8==4.0.1', - 'flake8-docstrings==1.6.0', - 'isort==5.13.2', - 'gitpython==3.1.27', - 'jupyter==1.0.0', - 'jinja2==3.1.2', - 'myst-parser==2.0.0', - 'orion>=0.2.4.post1', - 'pyyaml==6.0', - 'pytest==7.1.2', - 'pytest-cov==3.0.0', - 'pytorch_lightning>=2.2.0', - 'pytype==2024.2.13', - 'sphinx==7.2.6', - 'sphinx-autoapi==3.0.0', - 'sphinx-rtd-theme==2.0.0', - 'sphinxcontrib-napoleon==0.7', - 'sphinxcontrib-katex==0.8.6', - 'tensorboard==2.16.2', - 'tqdm==4.64.0', - 'torch==2.2.0', - 'torchvision>=0.17.0', - ], + install_requires=requirements, entry_points={ 'console_scripts': [ 'cd-train=crystal_diffusion.train:main', From a1f5bf58a20c803cd574a317577f37294faf514e Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 15:32:44 -0500 Subject: [PATCH 20/32] flake8 errors --- crystal_diffusion/data/parse_lammps_outputs.py | 3 +-- tests/data/test_parse_lammps_output.py | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crystal_diffusion/data/parse_lammps_outputs.py b/crystal_diffusion/data/parse_lammps_outputs.py index 52033742..b23ddb5f 100644 --- a/crystal_diffusion/data/parse_lammps_outputs.py +++ b/crystal_diffusion/data/parse_lammps_outputs.py @@ -28,10 +28,9 @@ def parse_lammps_output(lammps_dump: str, lammps_thermo_log: str, output_name: s for doc in dump_yaml: # loop over MD steps if 'id' not in doc['keywords']: # sanity check raise ValueError('id should be in LAMMPS dump file') - atoms_info = {} # store information on atoms positions and forces here + atoms_info = {} # store information on atoms positions and forces here for data in doc['data']: # loop over the atoms to get their positions and forces for key, v in zip(doc['keywords'], data): - print(key, v) if key not in ['id', 'type', 'x', 'y', 'z', 'fx', 'fy', 'fz']: continue else: diff --git a/tests/data/test_parse_lammps_output.py b/tests/data/test_parse_lammps_output.py index 4cf6d6c9..bb82ae52 100644 --- a/tests/data/test_parse_lammps_output.py +++ b/tests/data/test_parse_lammps_output.py @@ -16,6 +16,7 @@ def generate_fake_yaml(filename, documents, multiple_docs=True): else: yaml.dump(documents, yaml_file) + @pytest.fixture def fake_lammps_yaml(tmpdir): # fake LAMMPS output file with 4 MD steps in 1D for 3 atoms @@ -69,4 +70,4 @@ def test_parse_lammps_outputs(fake_lammps_yaml, fake_thermo_yaml, tmpdir): elif v == 'fx': assert np.allclose(df[v][x], [x + 0.01 * y for y in range(1, 4)]) else: # v == 'energy' - assert np.allclose(df[v][x], [2 * x + 0.9]) \ No newline at end of file + assert np.allclose(df[v][x], [2 * x + 0.9]) From b9affd444cd81cf3e270a245c58b25bc71f839a8 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 15:37:02 -0500 Subject: [PATCH 21/32] isort fix --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index fb11dfb6..2a9715ba 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,5 @@ from setuptools import find_packages, setup - with open('requirements.txt', 'r') as f: requirements = f.readlines() From c93d82d24dd14573f08b37af714a561653361296 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 15:45:02 -0500 Subject: [PATCH 22/32] typo in requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0ade435e..3b402065 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -datasets-2.17.1 +datasets==2.17.1 flake8==4.0.1 flake8-docstrings==1.6.0 gitpython==3.1.27 From fe06eba1ec0edb3ec02859c58e740598f6bc72c0 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Thu, 22 Feb 2024 15:48:05 -0500 Subject: [PATCH 23/32] another typo in requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3b402065..c50c4cc3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ jupyter==1.0.0 jinja2==3.1.2 myst-parser==2.0.0 orion>=0.2.4.post1 -pyarrow-15.0.0 +pyarrow==15.0.0 pyyaml==6.0 pytest==7.1.2 pytest-cov==3.0.0 From cd78eb0ba50881507e965d114cd4706e73cddd45 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Fri, 23 Feb 2024 13:40:28 -0500 Subject: [PATCH 24/32] code review fixes --- data/lammps_input_example.lammps | 2 +- data/run_lammps_example.sh | 2 +- data/si.sw | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) create mode 100755 data/si.sw diff --git a/data/lammps_input_example.lammps b/data/lammps_input_example.lammps index ba9d70a5..9531b57d 100755 --- a/data/lammps_input_example.lammps +++ b/data/lammps_input_example.lammps @@ -16,7 +16,7 @@ mass 1 28.0855 group Si type 1 pair_style sw -pair_coeff * * ../Si.sw Si +pair_coeff * * data/si.sw Si velocity all create ${T} 62177 diff --git a/data/run_lammps_example.sh b/data/run_lammps_example.sh index 66ba52c6..f0262196 100644 --- a/data/run_lammps_example.sh +++ b/data/run_lammps_example.sh @@ -3,4 +3,4 @@ TEMPERATURE=300 BOX_SIZE=1 -lmp < data/lammps_input_example -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE \ No newline at end of file +lmp < data/lammps_input_example.lammps -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE \ No newline at end of file diff --git a/data/si.sw b/data/si.sw new file mode 100755 index 00000000..26cfc25b --- /dev/null +++ b/data/si.sw @@ -0,0 +1,22 @@ +# Stillinger-Weber parameters for various elements and mixtures +# multiple entries can be added to this file, LAMMPS reads the ones it needs +# these entries are in LAMMPS "metal" units: +# epsilon = eV; sigma = Angstroms +# other quantities are unitless + +# format of a single entry (one or more lines): +# element 1, element 2, element 3, +# epsilon, sigma, a, lambda, gamma, costheta0, A, B, p, q, tol + +# Here are the original parameters in metal units, for Silicon from: +# +# Stillinger and Weber, Phys. Rev. B, v. 31, p. 5262, (1985) +# +# Parameters for 'dia' Si +Si Si Si 2.1683 2.0951 1.80 21.0 1.20 -0.333333333333 + 7.049556277 0.6022245584 4.0 0.0 0.0 +# +# Parameters for amorphous Si with the modified SW potential +#(R. L. C. Vink, G. T. Barkema, W. F. van der Weg et N. Mousseau, A semi-empirical potential for amorphous silicon, J. Non-Cryst. Sol. 282, 248-255 (2001)) +#Si Si Si 1.64833 2.0951 1.80 31.5 1.20 -0.333333333333 +# 7.049556277 0.6022245584 4.0 0.0 0.0 From 9a3e24342c93e10ffc21747517ef9ee95093bfa8 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Fri, 23 Feb 2024 15:08:22 -0500 Subject: [PATCH 25/32] code review part 2 --- crystal_diffusion/data/parse_lammps_outputs.py | 2 +- data/lammps_input_example.lammps | 2 +- data/run_lammps_example.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crystal_diffusion/data/parse_lammps_outputs.py b/crystal_diffusion/data/parse_lammps_outputs.py index b23ddb5f..0aecf232 100644 --- a/crystal_diffusion/data/parse_lammps_outputs.py +++ b/crystal_diffusion/data/parse_lammps_outputs.py @@ -10,7 +10,7 @@ def parse_lammps_output(lammps_dump: str, lammps_thermo_log: str, output_name: s Args: lammps_dump: LAMMPS output file - lammps_thermo_log: LAMMPS + lammps_thermo_log: LAMMPS thermodynamic variables output file output_name: name of parsed output written by the script """ if not os.path.exists(lammps_dump): diff --git a/data/lammps_input_example.lammps b/data/lammps_input_example.lammps index 9531b57d..4aca590e 100755 --- a/data/lammps_input_example.lammps +++ b/data/lammps_input_example.lammps @@ -16,7 +16,7 @@ mass 1 28.0855 group Si type 1 pair_style sw -pair_coeff * * data/si.sw Si +pair_coeff * *si.sw Si velocity all create ${T} 62177 diff --git a/data/run_lammps_example.sh b/data/run_lammps_example.sh index f0262196..4e639cfe 100644 --- a/data/run_lammps_example.sh +++ b/data/run_lammps_example.sh @@ -3,4 +3,4 @@ TEMPERATURE=300 BOX_SIZE=1 -lmp < data/lammps_input_example.lammps -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE \ No newline at end of file +lmp < lammps_input_example.lammps -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE \ No newline at end of file From 2df251888c4b25e135781d626f6ce758e80cd87b Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Fri, 23 Feb 2024 15:20:46 -0500 Subject: [PATCH 26/32] missing space --- data/lammps_input_example.lammps | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/lammps_input_example.lammps b/data/lammps_input_example.lammps index 4aca590e..c2f77445 100755 --- a/data/lammps_input_example.lammps +++ b/data/lammps_input_example.lammps @@ -16,7 +16,7 @@ mass 1 28.0855 group Si type 1 pair_style sw -pair_coeff * *si.sw Si +pair_coeff * * si.sw Si velocity all create ${T} 62177 From 7c20038fd59d74ddc800a27e64d8e621db7be637 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Mon, 26 Feb 2024 08:14:01 -0500 Subject: [PATCH 27/32] isort in requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index c50c4cc3..cc765920 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ datasets==2.17.1 flake8==4.0.1 flake8-docstrings==1.6.0 gitpython==3.1.27 +isort==5.13.2 jupyter==1.0.0 jinja2==3.1.2 myst-parser==2.0.0 From ea7664beef6fa0af70549578bf3716085141e2bc Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Mon, 26 Feb 2024 08:21:56 -0500 Subject: [PATCH 28/32] fixes from code review --- crystal_diffusion/data/parse_lammps_outputs.py | 11 ++++++----- {crystal_diffusion/data => data}/parse_lammps.sh | 0 data/run_lammps_example.sh | 5 ++++- 3 files changed, 10 insertions(+), 6 deletions(-) rename {crystal_diffusion/data => data}/parse_lammps.sh (100%) diff --git a/crystal_diffusion/data/parse_lammps_outputs.py b/crystal_diffusion/data/parse_lammps_outputs.py index 0aecf232..f118067c 100644 --- a/crystal_diffusion/data/parse_lammps_outputs.py +++ b/crystal_diffusion/data/parse_lammps_outputs.py @@ -1,5 +1,6 @@ import argparse import os +from collections import defaultdict import pandas as pd import yaml @@ -24,20 +25,20 @@ def parse_lammps_output(lammps_dump: str, lammps_thermo_log: str, output_name: s dump_yaml = yaml.safe_load_all(f) # every MD iteration is saved as a separate document in the yaml file # prepare a dataframe to get all the data - pd_data = {} + pd_data = defaultdict(list) for doc in dump_yaml: # loop over MD steps if 'id' not in doc['keywords']: # sanity check raise ValueError('id should be in LAMMPS dump file') - atoms_info = {} # store information on atoms positions and forces here + atoms_info = defaultdict(list) # store information on atoms positions and forces here for data in doc['data']: # loop over the atoms to get their positions and forces for key, v in zip(doc['keywords'], data): if key not in ['id', 'type', 'x', 'y', 'z', 'fx', 'fy', 'fz']: continue else: - atoms_info[key] = atoms_info.get(key, []) + [v] # get positions or forces + atoms_info[key].append(v) # get positions or forces # add the information about that MD step to the dataframe - for k, v in atoms_info.items(): # k should be x, y, z, fx, fy, fz - pd_data[k] = pd_data.get(k, []) + [v] + for k, v in atoms_info.items(): # k should be id, type, x, y, z, fx, fy, fz + pd_data[k].append(v) # get the total energy from the LAMMPS second output with open(lammps_thermo_log, 'r') as f: diff --git a/crystal_diffusion/data/parse_lammps.sh b/data/parse_lammps.sh similarity index 100% rename from crystal_diffusion/data/parse_lammps.sh rename to data/parse_lammps.sh diff --git a/data/run_lammps_example.sh b/data/run_lammps_example.sh index 4e639cfe..7f17d2e1 100644 --- a/data/run_lammps_example.sh +++ b/data/run_lammps_example.sh @@ -3,4 +3,7 @@ TEMPERATURE=300 BOX_SIZE=1 -lmp < lammps_input_example.lammps -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE \ No newline at end of file +lmp < lammps_input_example.lammps -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE + +# extract the thermodynamic outputs in a yaml file +egrep '^(keywords:|data:$|---$|\.\.\.$| - \[)' log.lammps > log.yaml \ No newline at end of file From 9550c930d8c89ea1d88a83c1498cc0fa6fa13e8b Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Mon, 26 Feb 2024 08:28:11 -0500 Subject: [PATCH 29/32] cleaning files from error in rebase --- crystal_diffusion/data/parse_lammps.sh | 11 ----------- data/run_lammps_example.sh | 3 +-- 2 files changed, 1 insertion(+), 13 deletions(-) delete mode 100755 crystal_diffusion/data/parse_lammps.sh diff --git a/crystal_diffusion/data/parse_lammps.sh b/crystal_diffusion/data/parse_lammps.sh deleted file mode 100755 index bcbb2079..00000000 --- a/crystal_diffusion/data/parse_lammps.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -EXP_DIR="lammps_scripts/Si/si-custom/" -DUMP_FILENAME="dump.si-300-1.yaml" -THERMO_FILENAME="thermo_log.yaml" -OUTPUT_NAME="demo.parquet" - -python crystal_diffusion/data/parse_lammps_outputs.py \ - --dump_file ${EXP_DIR}/${DUMP_FILENAME} \ - --thermo_file ${EXP_DIR}/${THERMO_FILENAME} \ - --output_name ${EXP_DIR}/${OUTPUT_NAME} diff --git a/data/run_lammps_example.sh b/data/run_lammps_example.sh index 526ab608..b521fc6b 100644 --- a/data/run_lammps_example.sh +++ b/data/run_lammps_example.sh @@ -8,6 +8,5 @@ lmp < lammps_input_example.lammps -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE # extract the thermodynamic outputs in a yaml file egrep '^(keywords:|data:$|---$|\.\.\.$| - \[)' log.lammps > log.yaml -======= + lmp < lammps_input_example.lammps -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE ->>>>>>> 5927511b163780d0be25d5db4a0eb8868b12f8b2 From fe1120c2224b14884eb6cfcf9e2c6f9b03a5c1a4 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Mon, 26 Feb 2024 08:29:28 -0500 Subject: [PATCH 30/32] error in run_lammps --- data/run_lammps_example.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/data/run_lammps_example.sh b/data/run_lammps_example.sh index b521fc6b..ef6d9399 100644 --- a/data/run_lammps_example.sh +++ b/data/run_lammps_example.sh @@ -8,5 +8,3 @@ lmp < lammps_input_example.lammps -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE # extract the thermodynamic outputs in a yaml file egrep '^(keywords:|data:$|---$|\.\.\.$| - \[)' log.lammps > log.yaml - -lmp < lammps_input_example.lammps -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE From 8d696dfb17b4bc0f852a8eadc62ab2413c66b4ae Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Mon, 26 Feb 2024 08:36:45 -0500 Subject: [PATCH 31/32] linting error --- crystal_diffusion/data/parse_lammps_outputs.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crystal_diffusion/data/parse_lammps_outputs.py b/crystal_diffusion/data/parse_lammps_outputs.py index db84fe99..ad6753ea 100644 --- a/crystal_diffusion/data/parse_lammps_outputs.py +++ b/crystal_diffusion/data/parse_lammps_outputs.py @@ -29,8 +29,7 @@ def parse_lammps_output(lammps_dump: str, lammps_thermo_log: str, output_name: s for doc in dump_yaml: # loop over MD steps if 'id' not in doc['keywords']: # sanity check raise ValueError('id should be in LAMMPS dump file') - atoms_info = defaultdict(list) # store information on atoms positions and forces here - + atoms_info = defaultdict(list) # store information on atoms positions and forces here for data in doc['data']: # loop over the atoms to get their positions and forces for key, v in zip(doc['keywords'], data): if key not in ['id', 'type', 'x', 'y', 'z', 'fx', 'fy', 'fz']: From a75654def03d2e560840968e4e0c7e9bccdb32a7 Mon Sep 17 00:00:00 2001 From: Simon Blackburn Date: Tue, 27 Feb 2024 08:11:22 -0500 Subject: [PATCH 32/32] git rebase error fix --- data/run_lammps_example.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/data/run_lammps_example.sh b/data/run_lammps_example.sh index ef6d9399..60ea1792 100644 --- a/data/run_lammps_example.sh +++ b/data/run_lammps_example.sh @@ -3,7 +3,6 @@ TEMPERATURE=300 BOX_SIZE=1 -<<<<<<< HEAD lmp < lammps_input_example.lammps -v STEP 10 -v T $TEMPERATURE -v S $BOX_SIZE # extract the thermodynamic outputs in a yaml file