From e0b5b38b552028a5f03416f9a61346b7e2ea2cb8 Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Thu, 17 Oct 2024 16:22:20 +0200 Subject: [PATCH] Hq as light scheduler for docker image (#795) In the docker image, the hyperqueue is pre-configured and replace the local.direct scheduler to limit the number of CPUs to be used when there are multiple calculations. The number of CPU and memory information are read from cgroups and set for the computer as default. These information can later be used for set up the default number of resource to be used for the QeApp. The number of CPU is set to be floor(ncpus) of the container, the goal is to have some amount of cpus to deal with system response specifically. - also include a small refactoring on the computer/code setup. --- Dockerfile | 39 +++++- .../{00_untar_home.sh => 00_untar-home.sh} | 16 ++- before-notebook.d/42_setup-hq-computer.sh | 21 +++ before-notebook.d/43_start-hq.sh | 59 ++++++++ src/aiidalab_qe/__main__.py | 11 +- src/aiidalab_qe/common/setup_codes.py | 4 +- src/aiidalab_qe/plugins/utils.py | 22 ++- src/aiidalab_qe/setup/codes.py | 130 ++++++++++++------ 8 files changed, 239 insertions(+), 63 deletions(-) rename before-notebook.d/{00_untar_home.sh => 00_untar-home.sh} (61%) create mode 100755 before-notebook.d/42_setup-hq-computer.sh create mode 100644 before-notebook.d/43_start-hq.sh diff --git a/Dockerfile b/Dockerfile index ff2ae2704..6016d1aee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,13 @@ # syntax=docker/dockerfile:1 -ARG FULL_STACK_VER=2024.1022 +ARG FULL_STACK_VER=2024.1023 ARG UV_VER=0.4.7 ARG QE_VER=7.2 ARG QE_DIR=/opt/conda/envs/quantum-espresso-${QE_VER} +ARG HQ_VER=0.19.0 ARG UV_CACHE_DIR=/tmp/uv_cache ARG QE_APP_SRC=/tmp/quantum-espresso +ARG HQ_COMPUTER="localhost-hq" FROM ghcr.io/astral-sh/uv:${UV_VER} AS uv @@ -43,22 +45,44 @@ RUN --mount=from=uv,source=/uv,target=/bin/uv \ # STAGE 3 # - Prepare AiiDA profile and localhost computer +# - Prepare hq computer using hyperqueue as scheduler # - Install QE codes and pseudopotentials # - Archive home folder FROM build_deps AS home_build ARG QE_DIR +ARG HQ_VER +ARG HQ_COMPUTER + +# Install hq binary +RUN wget -c -O hq.tar.gz https://github.com/It4innovations/hyperqueue/releases/download/v${HQ_VER}/hq-v${HQ_VER}-linux-x64.tar.gz && \ + tar xf hq.tar.gz -C /opt/conda/ + ENV PSEUDO_FOLDER=/tmp/pseudo RUN mkdir -p ${PSEUDO_FOLDER} && \ python -m aiidalab_qe download-pseudos --dest ${PSEUDO_FOLDER} +ENV UV_CONSTRAINT=${PIP_CONSTRAINT} +# Install the aiida-hyperqueue +# XXX: fix me after release aiida-hyperqueue +RUN --mount=from=uv,source=/uv,target=/bin/uv \ + --mount=from=build_deps,source=${UV_CACHE_DIR},target=${UV_CACHE_DIR},rw \ + uv pip install --system --strict --cache-dir=${UV_CACHE_DIR} \ + "aiida-hyperqueue@git+https://github.com/aiidateam/aiida-hyperqueue" + +COPY ./before-notebook.d/* /usr/local/bin/before-notebook.d/ + +ENV HQ_COMPUTER=$HQ_COMPUTER + # TODO: Remove PGSQL and daemon log files, and other unneeded files RUN --mount=from=qe_conda_env,source=${QE_DIR},target=${QE_DIR} \ bash /usr/local/bin/before-notebook.d/20_start-postgresql.sh && \ bash /usr/local/bin/before-notebook.d/40_prepare-aiida.sh && \ - python -m aiidalab_qe install-qe && \ + bash /usr/local/bin/before-notebook.d/42_setup-hq-computer.sh && \ + python -m aiidalab_qe install-qe --computer ${HQ_COMPUTER} && \ python -m aiidalab_qe install-pseudos --source ${PSEUDO_FOLDER} && \ verdi daemon stop && \ mamba run -n aiida-core-services pg_ctl stop && \ + touch /home/${NB_USER}/.FLAG_HOME_INITIALIZED && \ cd /home/${NB_USER} && tar -cf /opt/conda/home.tar . # STAGE 3 - Final stage @@ -71,22 +95,31 @@ FROM ghcr.io/aiidalab/full-stack:${FULL_STACK_VER} ARG QE_DIR ARG QE_APP_SRC ARG UV_CACHE_DIR +ARG HQ_COMPUTER USER ${NB_USER} WORKDIR /tmp # Install python dependencies # Use uv cache from the previous build step +# # Install the aiida-hyperqueue +# # XXX: fix me after release aiida-hyperqueue ENV UV_CONSTRAINT=${PIP_CONSTRAINT} RUN --mount=from=uv,source=/uv,target=/bin/uv \ --mount=from=build_deps,source=${UV_CACHE_DIR},target=${UV_CACHE_DIR},rw \ --mount=from=build_deps,source=${QE_APP_SRC},target=${QE_APP_SRC},rw \ - uv pip install --strict --system --compile-bytecode --cache-dir=${UV_CACHE_DIR} ${QE_APP_SRC} + uv pip install --strict --system --compile-bytecode --cache-dir=${UV_CACHE_DIR} ${QE_APP_SRC} "aiida-hyperqueue@git+https://github.com/aiidateam/aiida-hyperqueue" + +# copy hq binary +COPY --from=home_build /opt/conda/hq /usr/local/bin/ COPY --from=qe_conda_env ${QE_DIR} ${QE_DIR} USER root + COPY ./before-notebook.d/* /usr/local/bin/before-notebook.d/ +ENV HQ_COMPUTER=$HQ_COMPUTER + # Remove content of $HOME # '-mindepth=1' ensures that we do not remove the home directory itself. RUN find /home/${NB_USER}/ -mindepth 1 -delete diff --git a/before-notebook.d/00_untar_home.sh b/before-notebook.d/00_untar-home.sh similarity index 61% rename from before-notebook.d/00_untar_home.sh rename to before-notebook.d/00_untar-home.sh index d911474e8..d55902280 100644 --- a/before-notebook.d/00_untar_home.sh +++ b/before-notebook.d/00_untar-home.sh @@ -4,7 +4,7 @@ set -eux home="/home/${NB_USER}" # Untar home archive file to restore home directory if it is empty -if [[ $(ls -A ${home} | wc -l) = "0" ]]; then +if [ ! -e $home/.FLAG_HOME_INITIALIZED ]; then if [[ ! -f $HOME_TAR ]]; then echo "File $HOME_TAR does not exist!" exit 1 @@ -15,12 +15,20 @@ if [[ $(ls -A ${home} | wc -l) = "0" ]]; then fi echo "Extracting $HOME_TAR to $home" + # NOTE: a tar error when deployed to k8s but at the momment not cause any issue + # tar: .: Cannot utime: Operation not permitted + # tar: .: Cannot change mode to rwxr-s---: Operation not permitted tar -xf $HOME_TAR -C "$home" - - echo "Copying directory '$QE_APP_FOLDER' to '$AIIDALAB_APPS'" - cp -r "$QE_APP_FOLDER" "$AIIDALAB_APPS" else echo "$home folder is not empty!" ls -lrta "$home" fi + +if [ -d $AIIDALAB_APPS/quantum-espresso ]; then + echo "Quantum ESPRESSO app does exist" +else + echo "Copying directory '$QE_APP_FOLDER' to '$AIIDALAB_APPS'" + cp -r "$QE_APP_FOLDER" "$AIIDALAB_APPS" +fi + set +eux diff --git a/before-notebook.d/42_setup-hq-computer.sh b/before-notebook.d/42_setup-hq-computer.sh new file mode 100755 index 000000000..7031ced00 --- /dev/null +++ b/before-notebook.d/42_setup-hq-computer.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -x + +# computer +verdi computer show ${HQ_COMPUTER} || verdi computer setup \ + --non-interactive \ + --label "${HQ_COMPUTER}" \ + --description "local computer with hyperqueue scheduler" \ + --hostname "localhost" \ + --transport core.local \ + --scheduler hyperqueue \ + --work-dir /home/${NB_USER}/aiida_run/ \ + --mpirun-command "mpirun -np {num_cpus}" + +verdi computer configure core.local "${HQ_COMPUTER}" \ + --non-interactive \ + --safe-interval 5.0 + +# disable the localhost which is set in base image +verdi computer disable localhost aiida@localhost diff --git a/before-notebook.d/43_start-hq.sh b/before-notebook.d/43_start-hq.sh new file mode 100644 index 000000000..c20a462e4 --- /dev/null +++ b/before-notebook.d/43_start-hq.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +set -x + +# NOTE: this cgroup folder hierachy is based on cgroupv2 +# if the container is open in system which has cgroupv1 the image build procedure will fail. +# Since the image is mostly for demo server where we know the machine and OS I supposed +# it should have cgroupv2 (> Kubernetes v1.25). +# We only build the server for demo server so it does not require user to have new cgroup. +# But for developers, please update your cgroup version to v2. +# See: https://kubernetes.io/docs/concepts/architecture/cgroups/#using-cgroupv2 + +# computer memory from runtime +MEMORY_LIMIT=$(cat /sys/fs/cgroup/memory.max) + +if [ "$MEMORY_LIMIT" = "max" ]; then + MEMORY_LIMIT=4096 + echo "No memory limit set, use 4GiB" +else + MEMORY_LIMIT=$(echo "scale=0; $MEMORY_LIMIT / (1024 * 1024)" | bc) + echo "Memory Limit: ${MEMORY_LIMIT} MiB" +fi + +# Compute number of cpus allocated to the container +CPU_LIMIT=$(awk '{print $1}' /sys/fs/cgroup/cpu.max) +CPU_PERIOD=$(awk '{print $2}' /sys/fs/cgroup/cpu.max) + +if [ "$CPU_PERIOD" -ne 0 ]; then + CPU_NUMBER=$(echo "scale=2; $CPU_LIMIT / $CPU_PERIOD" | bc) + echo "Number of CPUs allocated: $CPU_NUMBER" + + # for HQ setting round to integer number of CPUs, the left are for system tasks + CPU_LIMIT=$(echo "scale=0; $CPU_LIMIT / $CPU_PERIOD" | bc) +else + # if no limit (with local OCI without setting cpu limit, use all CPUs) + CPU_LIMIT=$(nproc) + echo "No CPU limit set" +fi + +# Start hq server with a worker +run-one-constantly hq server start 1>$HOME/.hq-stdout 2>$HOME/.hq-stderr & +run-one-constantly hq worker start --cpus=${CPU_LIMIT} --resource "mem=sum(${MEMORY_LIMIT})" --no-detect-resources & + +# Reset the default memory_per_machine and default_mpiprocs_per_machine +# c.set_default_mpiprocs_per_machine = ${CPU_LIMIT} +# c.set_default_memery_per_machine = ${MEMORY_LIMIT} + +# Same as original localhost set job poll interval to 2.0 secs +# In addition, set default mpiprocs and memor per machine +# TODO: this will be run every time the container start, we need a lock file to prevent it. +job_poll_interval="2.0" +computer_name=${HQ_COMPUTER} +python -c " +from aiida import load_profile; from aiida.orm import load_computer; +load_profile(); +load_computer('${computer_name}').set_minimum_job_poll_interval(${job_poll_interval}) +load_computer('${computer_name}').set_default_mpiprocs_per_machine(${CPU_LIMIT}) +load_computer('${computer_name}').set_default_memory_per_machine(${MEMORY_LIMIT}) +" diff --git a/src/aiidalab_qe/__main__.py b/src/aiidalab_qe/__main__.py index 1e5d5043a..2a81f56d0 100644 --- a/src/aiidalab_qe/__main__.py +++ b/src/aiidalab_qe/__main__.py @@ -16,19 +16,20 @@ def cli(): @cli.command() @click.option("-f", "--force", is_flag=True) +@click.option("--computer") @click.option("-p", "--profile", default=_DEFAULT_PROFILE) -def install_qe(force, profile): +def install_qe(force, profile, computer): from aiida import load_profile - from aiidalab_qe.setup.codes import codes_are_setup, install + from aiidalab_qe.setup.codes import codes_are_setup, install_and_setup load_profile(profile) try: - for msg in install(force=force): + for msg in install_and_setup(computer=computer, force=force): click.echo(msg) - assert codes_are_setup() + assert codes_are_setup(computer=computer) click.secho("Codes are setup!", fg="green") except Exception as error: - raise click.ClickException(f"Failed to set up QE failed: {error}") from error + raise click.ClickException(f"Failed to set up QE: {error}") from error @cli.command() diff --git a/src/aiidalab_qe/common/setup_codes.py b/src/aiidalab_qe/common/setup_codes.py index 32cae7ede..99aa4e5ec 100644 --- a/src/aiidalab_qe/common/setup_codes.py +++ b/src/aiidalab_qe/common/setup_codes.py @@ -4,7 +4,7 @@ import ipywidgets as ipw import traitlets -from ..setup.codes import QE_VERSION, install +from ..setup.codes import QE_VERSION, install_and_setup from .widgets import ProgressBar __all__ = [ @@ -66,7 +66,7 @@ def _refresh_installed(self): try: self.set_trait("busy", True) - for msg in install(): + for msg in install_and_setup(): self.set_message(msg) except Exception as error: diff --git a/src/aiidalab_qe/plugins/utils.py b/src/aiidalab_qe/plugins/utils.py index 8aa602ac5..a1d6ee841 100644 --- a/src/aiidalab_qe/plugins/utils.py +++ b/src/aiidalab_qe/plugins/utils.py @@ -3,12 +3,22 @@ def set_component_resources(component, code_info): """Set the resources for a given component based on the code info.""" - if code_info: # Ensure code_info is not None or empty - component.metadata.options.resources = { - "num_machines": code_info["nodes"], - "num_mpiprocs_per_machine": code_info["ntasks_per_node"], - "num_cores_per_mpiproc": code_info["cpus_per_task"], - } + if code_info: # Ensure code_info is not None or empty (# XXX: ? from jyu, need to pop a warning to plugin developer or what?) + code: orm.Code = code_info["code"] + if code.computer.scheduler_type == "hyperqueue": + component.metadata.options.resources = { + "num_cpus": code_info["nodes"] + * code_info["ntasks_per_node"] + * code_info["cpus_per_task"] + } + else: + # XXX: jyu should properly deal with None type of scheduler_type which can be "core.direct" (will be replaced by hyperqueue) and "core.slurm" ... + component.metadata.options.resources = { + "num_machines": code_info["nodes"], + "num_mpiprocs_per_machine": code_info["ntasks_per_node"], + "num_cores_per_mpiproc": code_info["cpus_per_task"], + } + component.metadata.options["max_wallclock_seconds"] = code_info[ "max_wallclock_seconds" ] diff --git a/src/aiidalab_qe/setup/codes.py b/src/aiidalab_qe/setup/codes.py index 21eba487e..c63a76bb7 100644 --- a/src/aiidalab_qe/setup/codes.py +++ b/src/aiidalab_qe/setup/codes.py @@ -1,13 +1,14 @@ +import subprocess from pathlib import Path from shutil import which -from subprocess import CalledProcessError, run from filelock import FileLock, Timeout from aiida.common.exceptions import NotExistent from aiida.orm import load_code -FN_LOCKFILE = Path.home().joinpath(".install-qe-on-localhost.lock") +FN_INSTALL_LOCKFILE = Path.home().joinpath(".install-qe-on-localhost.lock") +FN_SETUP_LOCKFILE = Path.home().joinpath(".setup-qe-on-localhost.lock") FN_DO_NOT_SETUP = Path.cwd().joinpath(".do-not-setup-on-localhost") QE_VERSION = "7.2" @@ -43,11 +44,22 @@ def get_qe_env(): def qe_installed(): - return get_qe_env().exists() + import json + + env_exist = get_qe_env().exists() + proc = subprocess.run( + ["conda", "list", "-n", f"{get_qe_env().name}", "--json", "--full-name", "qe"], + check=True, + capture_output=True, + ) + + info = json.loads(str(proc.stdout.decode()))[0] + + return env_exist and "qe" == info["name"] def install_qe(): - run( + subprocess.run( [ "conda", "create", @@ -64,17 +76,17 @@ def install_qe(): ) -def _code_is_setup(name): +def _code_is_setup(name, computer): try: - load_code(f"{name}-{QE_VERSION}@localhost") + load_code(f"{name}-{QE_VERSION}@{computer}") except NotExistent: return False else: return True -def codes_are_setup(): - return all(_code_is_setup(code_name) for code_name in CODE_NAMES) +def codes_are_setup(computer): + return all(_code_is_setup(code_name, computer) for code_name in CODE_NAMES) def _generate_header_to_setup_code(): @@ -89,13 +101,13 @@ def _generate_header_to_setup_code(): return header_code -def _generate_string_to_setup_code(code_name, computer_name="localhost"): +def _generate_string_to_setup_code(code_name, computer): """Generate the Python string to setup an AiiDA code for a given computer. Tries to load an existing code and if not existent, generates Python code to create and store a new code setup.""" try: - load_code(f"{code_name}-{QE_VERSION}@{computer_name}") + load_code(f"{code_name}-{QE_VERSION}@{computer}") except NotExistent: label = f"{code_name}-{QE_VERSION}" description = f"{code_name}.x ({QE_VERSION}) setup by AiiDAlab." @@ -114,7 +126,7 @@ def _generate_string_to_setup_code(code_name, computer_name="localhost"): code.store() """.format( # noqa: UP032 - computer_name, + computer, label, description, filepath_executable, @@ -127,77 +139,109 @@ def _generate_string_to_setup_code(code_name, computer_name="localhost"): return "" -def setup_codes(): +def setup_codes(computer): python_code = _generate_header_to_setup_code() for code_name in CODE_NAMES: - python_code += _generate_string_to_setup_code(code_name) + python_code += _generate_string_to_setup_code(code_name, computer) try: - run(["python", "-c", python_code], capture_output=True, check=True) - except CalledProcessError as error: - raise RuntimeError(f"Failed to setup codes: {error}") from None + subprocess.run(["python", "-c", python_code], capture_output=True, check=True) + except subprocess.CalledProcessError as err: + raise RuntimeError( + f"Failed to setup codes, exit_code={err.returncode}, {err.stderr}" + ) from None -def install(force=False): +def install_and_setup(computer="localhost", force=False): """Install Quantum ESPRESSO and the corresponding AiiDA codes. Args: force: Ignore previously failed attempts and install anyways. + computer: computer label in AiiDA where the code is setup for """ # Check for "do not install file" and skip actual check. The purpose of # this file is to not re-try this process on every app start in case that # there are issues. + # XXX: use filelock to control `FN_DO_NOT_SETUP` as well if not force and FN_DO_NOT_SETUP.exists(): raise RuntimeError("Installation failed in previous attempt.") + yield from _install() + yield from _setup(computer) + + +def _install(): + """Install Quantum ESPRESSO.""" yield "Checking installation status..." conda_installed = which("conda") try: - with FileLock(FN_LOCKFILE, timeout=5): - # We assume that if the codes are already setup, everything is in - # order. Only if they are not present, should we take action, - # however we only do so if the environment has a conda binary - # present (`which conda`). If that is not the case then we assume - # that this is a custom user environment in which case we also take - # no further action. - if codes_are_setup(): - return # Already setup - + with FileLock(FN_INSTALL_LOCKFILE, timeout=5): if not conda_installed: raise RuntimeError( "Unable to automatically install Quantum ESPRESSO, conda " "is not available." ) + if qe_installed(): + return + + # Install Quantum ESPRESSO. + yield "Installing QE..." + try: + install_qe() + except subprocess.CalledProcessError as error: + raise RuntimeError( + f"Failed to create conda environment: {error}" + ) from None + + except Timeout: + # Assume that the installation was triggered by a different process. + yield "Installation was already started, waiting for it to finish..." + with FileLock(FN_INSTALL_LOCKFILE, timeout=120): if not qe_installed(): - # First, install Quantum ESPRESSO. - yield "Installing QE..." - try: - install_qe() - except CalledProcessError as error: - raise RuntimeError( - f"Failed to create conda environment: {error}" - ) from None + raise RuntimeError( + "Installation process did not finish in the expected time." + ) from None + + +def _setup(computer): + """Setup the corresponding AiiDA codes after QE installation.""" + yield "Checking setup status..." + + try: + with FileLock(FN_SETUP_LOCKFILE, timeout=5): + # We assume that if the codes are already setup, everything is in + # order. Only if they are not present, should we take action, + # however we only do so if the environment has a conda binary + # present (`which conda`). If that is not the case then we assume + # that this is a custom user environment in which case we also take + # no further action. + if codes_are_setup(computer=computer): + return # Already setup # After installing QE, we install the corresponding # AiiDA codes: python_code = _generate_header_to_setup_code() for code_name in CODE_NAMES: - if not _code_is_setup(code_name): - yield f"Preparing setup script for ({code_name})..." - code_string = _generate_string_to_setup_code(code_name) + if not _code_is_setup(code_name, computer=computer): + yield f"Preparing setup script for ({code_name}) on ({computer})..." + code_string = _generate_string_to_setup_code(code_name, computer) python_code += code_string try: yield "Setting up all codes..." - run(["python", "-c", python_code], capture_output=True, check=True) - except CalledProcessError as error: - raise RuntimeError(f"Failed to setup codes: {error}") from None + subprocess.run( + ["python", "-c", python_code], capture_output=True, check=True + ) + except subprocess.CalledProcessError as err: + raise RuntimeError( + f"Failed to setup codes, exit_code={err.returncode}, {err.stderr}" + ) from None except Timeout: # Assume that the installation was triggered by a different process. yield "Installation was already started, waiting for it to finish..." - with FileLock(FN_LOCKFILE, timeout=120): - if not codes_are_setup(): + with FileLock(FN_SETUP_LOCKFILE, timeout=120): + if not codes_are_setup(computer=computer): raise RuntimeError( "Installation process did not finish in the expected time." ) from None