diff --git a/.github/workflows/pytests.yml b/.github/workflows/pytests.yml index eac67404..227a1813 100644 --- a/.github/workflows/pytests.yml +++ b/.github/workflows/pytests.yml @@ -14,36 +14,40 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ 3.8 ] + python-version: [ "3.9" ] max-parallel: 5 env: - coverage-on-version: 3.8 + coverage-on-version: "3.9" use-mpi: True + defaults: + run: + shell: bash -l {0} steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + - uses: actions/checkout@v4 + + - name: Set up python via conda + uses: conda-incubator/setup-miniconda@v3 with: + auto-update-conda: true python-version: ${{ matrix.python-version }} - - name: Add conda to system path + - name: Check python version run: | - # $CONDA is an environment variable pointing to the root of the miniconda directory - echo $CONDA/bin >> $GITHUB_PATH - - # - name: Install Dependencies from Conda - # run: conda env update --file=devtools/conda-envs/environment.yml --name=base + which python3 + python3 --version - - name: Install pip from Conda - run: conda install pip + - name: Install dependencies from pip + run: python3 -m pip install wheel setuptools numpy scipy click matplotlib pyyaml spglib rdkit flake8 pytest pytest-cov requests - - name: Install dependencies from pip (some will already be taken care of by conda's phonop3py and its dependencies) - run: pip install wheel setuptools numpy scipy click matplotlib pandas pyyaml spglib rdkit-pypi flake8 pytest pytest-cov + - name: Check numpy + run: | + python3 -m pip list | grep numpy + python3 -c "import numpy; print(numpy.__file__, numpy.__version__)" - name: Install latest ASE from gitlab run: | - pip install git+https://gitlab.com/ase/ase.git + python3 -m pip install git+https://gitlab.com/ase/ase.git echo -n "ASE VERSION " python3 -c "import ase; print(ase.__file__, ase.__version__)" @@ -87,12 +91,20 @@ jobs: cd .. - name: Install Quippy from PyPI - run: pip install quippy-ase + run: python3 -m pip install quippy-ase + + - name: Install xTB (before things that need pandas like MACE and wfl, since it will break pandas-numpy compatibility by downgrading numpy) + run: | + conda install -c conda-forge xtb-python + python3 -m pip install typing-extensions + # install pandas now to encourage compatible numpy version after conda regressed it + python3 -m pip install pandas - name: MACE run: | echo "search for torch version" - torch_version=$( pip3 install torch== 2>&1 | fgrep 'from versions' | sed -e 's/.* //' -e 's/)//' ) + set +o pipefail + torch_version=$( python3 -m pip install torch== 2>&1 | fgrep 'from versions' | sed -e 's/.* //' -e 's/)//' ) echo "found torch version $torch_version, installing cpu-only variant" python3 -m pip install torch==${torch_version}+cpu -f https://download.pytorch.org/whl/torch_stable.html echo "installing mace" @@ -101,7 +113,7 @@ jobs: - name: Julia and ace fit run: | - pip install pip install threadpoolctl + python3 -m pip install pip install threadpoolctl wget https://julialang-s3.julialang.org/bin/linux/x64/1.8/julia-1.8.1-linux-x86_64.tar.gz tar xzf julia-1.8.1-linux-x86_64.tar.gz # note that this hardwires a particular compatible ACE1pack version @@ -109,7 +121,7 @@ jobs: ${PWD}/julia-1.8.1/bin/julia ace1pack_install.jl - name: Install wfl (expyre and universalSOAP are dependencies) - run: pip install . + run: python3 -m pip install . - name: Install Quantum Espresso run: | @@ -138,17 +150,12 @@ jobs: run: | echo $HOME/bin >> $GITHUB_PATH - - name: Install xTB - run: | - conda install -c conda-forge xtb-python - pip install typing-extensions - - name: Install MPI dependencies if: env.use-mpi run: | # this can eaily be turned off if needed conda install -c conda-forge mpi4py openmpi pytest-mpi - pip install mpipool + python3 -m pip install mpipool - name: Install and configure slurm and ExPyRe run: | @@ -203,6 +210,10 @@ jobs: - name: Test with pytest - coverage if: env.coverage-on-version == matrix.python-version run: | + echo "BOB pre actual pytest" + which python3 + python3 -m pip list | grep numpy + python3 -c "import numpy; print(numpy.__file__, numpy.__version__)" rm -rf $HOME/pytest_cov mkdir $HOME/pytest_cov # diff --git a/README.md b/README.md index 7cdcbdb4..d13d5ba0 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,29 @@ The main functions of Workflow is to efficiently parallelise operations over a s For examples and more information see [documentation](https://libatoms.github.io/workflow/) +NOTE: because of the very large time intervals between official ASE releases, `wfl` is typically +set up for (and tested against) the latest ASE gitlab repo `master` branch. Recent changes +that require this support include variable cell minimization using `FrechetCellFilter` and +`Espresso` calculator configuration. See documentation link above for installation instructions. + # Recent changes +v0.2.3: + +- Add wfl.generate.neb, with required improved support for passing ConfigSet.groups() to + autoaparallelized functions + +- Improved handling of old and new style ase.calculators.espresso.Espresso initialization + +v0.2.2: + +- Improve checking of DFT calculator convergence + +v0.2.1: + +- Fix group iterator + v0.2.0: - Change all wfl operations to use explicit random number generator [pull 285](https://github.com/libAtoms/workflow/pull/285), to improve reproducibility of scripts and reduce the chances that on script rerun, cached jobs will not be recognized due to uncontrolled change in random seed (as in [issue 283](https://github.com/libAtoms/workflow/issues/283) and [issue 284](https://github.com/libAtoms/workflow/issues/284)). Note that this change breaks backward compatibility because many functions now _require_ an `rng` argument, for example diff --git a/complete_pytest.tin b/complete_pytest.tin index 834b1dc6..f5e3cd2e 100755 --- a/complete_pytest.tin +++ b/complete_pytest.tin @@ -32,7 +32,7 @@ export ASE_VASP_COMMAND_GAMMA=vasp.gamma.serial export PYTEST_VASP_POTCAR_DIR=$VASP_PATH/pot/rev_54/PBE # QE module load dft/pwscf -export PYTEST_WFL_ASE_ESPRESSO_COMMAND="mpirun -np 1 pw.x" +export PYTEST_WFL_ASE_ESPRESSO_COMMAND="env MPIRUN_EXTRA_ARGS='-np 1' pw.x" # no ORCA export OPENBLAS_NUM_THREADS=1 diff --git a/docs/source/index.rst b/docs/source/index.rst index bbae0b5c..fad2e397 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -21,7 +21,21 @@ Quick start that installs all of the mandatory dependencies: .. code-block:: sh - pip install git+https://github.com/libAtoms/workflow + python3 -m pip install git+https://github.com/libAtoms/workflow + +.. warning:: + + `wfl` requires ASE, so `ase` is listed as a `pip` dependency, + and if not already installed, `pip install` will install the latest + `pypi` release. However, because of the large delay in producing new + releases, the latest `pypi` version is often quite old, and `wfl` + has some functionality that requires a newer version. To ensure + a sufficiently up-to-date version is available, before installing + `wfl` install the latest `ase` from gitlab, with a command such as + + .. code-block:: sh + + python3 -m pip install git+https://gitlab.com/ase/ase *************************************** Repository diff --git a/setup.py b/setup.py index e66f65fc..483a2c96 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ name="wfl", version="0.2.3", packages=setuptools.find_packages(exclude=["tests"]), - install_requires=["click>=7.0", "numpy", "ase>=3.21", "pyyaml", "spglib", "docstring_parser", + install_requires=["click>=7.0", "numpy", "ase>=3.22.1", "pyyaml", "spglib", "docstring_parser", "expyre-wfl @ https://github.com/libAtoms/ExPyRe/tarball/main", "universalSOAP @ https://github.com/libAtoms/universalSOAP/tarball/main"], entry_points=""" diff --git a/tests/test_configset.py b/tests/test_configset.py index 2e07788d..60eace59 100644 --- a/tests/test_configset.py +++ b/tests/test_configset.py @@ -107,6 +107,58 @@ def test_mult_files_mult_Atoms(tmp_path, ats): locs = [f" / {i0} / {i1}" for i0 in range(2) for i1 in range(5)] check_ConfigSet(cs, locs, gather_numbers([ats[0:5], ats[5:10]])) +def test_mult_files_mult_Atoms_glob_file(tmp_path, ats): + print("CHECK mult file with mult Atoms using a glob for the filename") + ase.io.write(tmp_path / "ats_0.xyz", ats[0:5]) + ase.io.write(tmp_path / "ats_1.xyz", ats[5:10]) + locs = [f" / {i0} / {i1}" for i0 in range(2) for i1 in range(5)] + + # file_root + glob in filename + cs = ConfigSet("ats_*.xyz", file_root=tmp_path) + check_ConfigSet(cs, locs, gather_numbers([ats[0:5], ats[5:10]])) + + # glob in full pathname + cs = ConfigSet(tmp_path / "ats_*.xyz") + check_ConfigSet(cs, locs, gather_numbers([ats[0:5], ats[5:10]])) + + # glob in absolute pathname + cs = ConfigSet(tmp_path.absolute() / "ats_*.xyz") + check_ConfigSet(cs, locs, gather_numbers([ats[0:5], ats[5:10]])) + +def test_mult_files_mult_Atoms_glob_dir(tmp_path, ats): + print("CHECK mult file with mult Atoms using a glob for directory that contains the files") + (tmp_path / "dir_0").mkdir() + (tmp_path / "dir_1").mkdir() + ase.io.write(tmp_path / "dir_0" / "ats.xyz", ats[0:5]) + ase.io.write(tmp_path / "dir_1" / "ats.xyz", ats[5:10]) + locs = [f" / {i0} / {i1}" for i0 in range(2) for i1 in range(5)] + + # glob for dir name, but same filename + cs = ConfigSet(tmp_path / "dir_*" / "ats.xyz") + check_ConfigSet(cs, locs, gather_numbers([ats[0:5], ats[5:10]])) + + # workdir with glob for dir name, but same filename + cs = ConfigSet("dir_*/ats.xyz", file_root=tmp_path) + check_ConfigSet(cs, locs, gather_numbers([ats[0:5], ats[5:10]])) + +def test_mult_files_mult_Atoms_mult_glob_dir(tmp_path, ats): + print("CHECK mult file with mult Atoms using multiple globs glob for directory that contains the files") + (tmp_path / "dir_0").mkdir() + (tmp_path / "dir_1").mkdir() + (tmp_path / "other_dir_0").mkdir() + ase.io.write(tmp_path / "dir_0" / "ats.xyz", ats[0:3]) + ase.io.write(tmp_path / "dir_1" / "ats.xyz", ats[3:6]) + ase.io.write(tmp_path / "other_dir_0" / "ats.xyz", ats[6:10]) + locs = [f" / {i0} / {i1}" for i0, i1 in [(0, 0), (0, 1), (0,2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2), (2, 3)]] + + # glob for dir name, but same filename + cs = ConfigSet([tmp_path / "dir_[01]" / "ats.xyz", tmp_path / "other_dir_*" / "ats.xyz"]) + check_ConfigSet(cs, locs, gather_numbers([ats[0:3], ats[3:6], ats[6:10]])) + + # workdir with glob for dir name, but same filename + cs = ConfigSet(["dir_[0-1]/ats.xyz", "other_dir_*/ats.xyz"], file_root=tmp_path) + check_ConfigSet(cs, locs, gather_numbers([ats[0:3], ats[3:6], ats[6:10]])) + def test_single_file_tree_Atoms(tmp_path, ats): for i in range(0, 3): ats[i].info["_ConfigSet_loc"] = f" / 0 / {i}" diff --git a/tests/test_remote_run.py b/tests/test_remote_run.py index e3222b21..7bb503b3 100644 --- a/tests/test_remote_run.py +++ b/tests/test_remote_run.py @@ -10,6 +10,7 @@ import ase.io from ase.atoms import Atoms +from ase.build import bulk from ase.calculators.emt import EMT import pytest @@ -21,6 +22,7 @@ from wfl.generate import optimize, md from wfl.calculators import generic from wfl.calculators.vasp import Vasp +from wfl.calculators.espresso import Espresso from wfl.autoparallelize import AutoparaInfo from expyre.func import ExPyReJobDiedError @@ -33,6 +35,14 @@ def test_generic_calc(tmp_path, expyre_systems, monkeypatch, remoteinfo_env): do_generic_calc(tmp_path, sys_name, monkeypatch, remoteinfo_env) +def test_generic_calc_qe(tmp_path, expyre_systems, monkeypatch, remoteinfo_env): + for sys_name in expyre_systems: + if sys_name.startswith('_'): + continue + + do_generic_calc_qe(tmp_path, sys_name, monkeypatch, remoteinfo_env) + + def test_minim(tmp_path, expyre_systems, monkeypatch, remoteinfo_env): for sys_name in expyre_systems: if sys_name.startswith('_'): @@ -174,6 +184,52 @@ def do_generic_calc(tmp_path, sys_name, monkeypatch, remoteinfo_env): assert dt_rerun < dt / 4.0 +# copied from calculators/test_qe.py::test_qe_calc +def do_generic_calc_qe(tmp_path, sys_name, monkeypatch, remoteinfo_env): + ri = {'sys_name': sys_name, 'job_name': 'pytest_'+sys_name, + 'resources': {'max_time': '1h', 'num_nodes': 1}, + 'num_inputs_per_queued_job': -36, 'check_interval': 10} + + qe_cmd = os.environ.get("PYTEST_WFL_ASE_ESPRESSO_COMMAND") + if qe_cmd is None: + pytest.skip("no PYTEST_WFL_ASE_ESPRESSO_COMMAND to specify executable") + pspot = tmp_path / "Si.UPF" + shutil.copy(Path(__file__).parent / "assets" / "QE" / "Si.pz-vbc.UPF", pspot) + + remoteinfo_env(ri) + print('RemoteInfo', ri) + + at = bulk("Si") + at.positions[0, 0] += 0.01 + at0 = Atoms("Si", cell=[6.0, 6.0, 6.0], positions=[[3.0, 3.0, 3.0]], pbc=False) + + kw = dict( + pseudopotentials=dict(Si=pspot.name), + input_data={"SYSTEM": {"ecutwfc": 40, "input_dft": "LDA",}}, + kpts=(2, 2, 2), + conv_thr=0.0001, + calculator_exec=qe_cmd, + pseudo_dir=str(pspot.parent) + ) + + calc = (Espresso, [], kw) + + # output container + c_out = OutputSpec("qe_results.xyz", file_root=tmp_path) + + results = generic.calculate( + inputs=[at0, at], + outputs=c_out, + calculator=calc, + output_prefix='QE_', + autopara_info={"remote_info": ri} + ) + + for at in results: + assert "QE_energy" in at.info + assert "QE_forces" in at.arrays + + def do_minim(tmp_path, sys_name, monkeypatch, remoteinfo_env): ri = {'sys_name': sys_name, 'job_name': 'pytest_'+sys_name, 'resources': {'max_time': '1h', 'num_nodes': 1}, diff --git a/wfl/calculators/espresso.py b/wfl/calculators/espresso.py index 595af3bf..80e8b454 100644 --- a/wfl/calculators/espresso.py +++ b/wfl/calculators/espresso.py @@ -2,6 +2,7 @@ Quantum Espresso interface """ +import os import shlex from copy import deepcopy @@ -46,7 +47,7 @@ class Espresso(WFLFileIOCalculator, ASE_Espresso): calculator_exec: str command for QE, without any prefix or redirection set. for example: "mpirun -n 4 /path/to/pw.x" - mutually exclusive with "command" + mutually exclusive with "command" with "profile" **kwargs: arguments for ase.calculators.espresso.Espresso """ @@ -61,19 +62,24 @@ def __init__(self, keep_files="default", rundir_prefix="run_QE_", calculator_exec=None, **kwargs): kwargs_command = deepcopy(kwargs) - if calculator_exec is not None: - if "command" in kwargs: - raise ValueError("Cannot specify both calculator_exec and command") - if EspressoProfile is None: - # older syntax - kwargs_command["command"] = f"{calculator_exec} -in PREFIX.pwi > PREFIX.pwo" - else: + + # check for various Espresso versions + # NOTE: should we be doing this much massaging of inputs, or should we make the user keep up + # with their ASE Espresso version? + if EspressoProfile is not None: + # new version, command and ASE_ESPRESSO_COMMAND deprecated + if "command" in kwargs_command: + raise ValueError("Espresso calculator defines EspressoProfile, but deprecated 'command' arg was passed") + + if calculator_exec is not None: + # check for conflicts, wrong format + if "profile" in kwargs_command: + raise ValueError("Cannot specify both calculator_exec and profile") if " -in " in calculator_exec: raise ValueError("calculator_exec should not include espresso command line arguments such as ' -in PREFIX.pwi'") + # newer syntax, but pass binary without a keyword (which changed from "argv" to "exc" # to "binary" over time), assuming it's first argument - if "pseudo_dir" not in kwargs_command: - raise ValueError(f"calculator_exec kwargs also requires pseudo_dir to create EspressoProfile") argv = shlex.split(calculator_exec) try: kwargs_command["profile"] = EspressoProfile(argv=argv) @@ -81,8 +87,28 @@ def __init__(self, keep_files="default", rundir_prefix="run_QE_", binary, parallel_info = parse_genericfileio_profile_argv(argv) # argument names keep changing (e.g. pseudo_path -> pseudo_dir), just pass first two as positional # and hope order doesn't change + if "pseudo_dir" not in kwargs_command: + raise ValueError(f"calculator_exec also requires pseudo_dir to create EspressoProfile") kwargs_command["profile"] = EspressoProfile(binary, kwargs_command.pop("pseudo_dir"), parallel_info=parallel_info) + elif "profile" not in kwargs_command: + raise ValueError("EspressoProfile is defined but neither calculator_exec nor profile was specified") + + # better be defined by now + assert "profile" in kwargs_command + else: + # old (pre EspressoProfile) version + if "profile" in kwargs_command: + raise ValueError("EspressoProfile is not defined (old version) but profile was passed") + + if calculator_exec is not None: + if "command" in kwargs_command: + raise ValueError("Cannot specify both command and calc_exec") + + kwargs_command["command"] = f"{calculator_exec} -in PREFIX.pwi > PREFIX.pwo" + + # command or env var must be set + assert "command" in kwargs_command or "ASE_ESPRESSO_CALCULATOR" in os.environ # WFLFileIOCalculator is a mixin, will call remaining superclass constructors for us super().__init__(keep_files=keep_files, rundir_prefix=rundir_prefix, diff --git a/wfl/calculators/generic.py b/wfl/calculators/generic.py index 17bd4df2..6a3baca5 100644 --- a/wfl/calculators/generic.py +++ b/wfl/calculators/generic.py @@ -51,7 +51,7 @@ def _run_autopara_wrappable(atoms, calculator, properties=None, output_prefix='_ except Exception as exc: # if calculator constructor failed, it may still be fine if every atoms object has # enough info to construct its own calculator, but we won't know until later - calculator_failure_message = str(exc) + calculator_failure_message = f"(exc)\n{traceback.format_exc()}" calculator_default = None if output_prefix == '_auto_': diff --git a/wfl/configset.py b/wfl/configset.py index dea6cc66..3974c611 100644 --- a/wfl/configset.py +++ b/wfl/configset.py @@ -1,6 +1,7 @@ import sys import re +import glob from pathlib import Path @@ -81,14 +82,20 @@ def __init__(self, items, *, file_root=None, read_kwargs={}, _open_reader=None, elif isinstance(items, (str, Path)): if file_root != Path("") and Path(items).is_absolute(): raise ValueError(f"Got file_root but file {items} is an absolute path") - self.items = file_root / items + # single item, could be a simple filename or a glob. Former needs to be stored as + # Path, latter as list(Path) + items_expanded = [Path(f) for f in sorted(glob.glob(str(file_root / items), recursive=True))] + if len(items_expanded) == 1 and file_root / items == items_expanded[0]: + self.items = file_root / items + else: + self.items = items_expanded elif isinstance(items[0], (str, Path)): self.items = [] for file_path in items: assert isinstance(file_path, (str, Path)) if file_root != Path("") and Path(file_path).is_absolute(): raise ValueError(f"Got file_root but file {file_path} is an absolute path") - self.items.append(file_root / file_path) + self.items.extend([Path(f) for f in sorted(glob.glob(str(file_root / file_path), recursive=True))]) elif isinstance(items[0], ConfigSet): self.items = [] for item in items: