Skip to content

Commit

Permalink
WIP: more CLI to control the remote
Browse files Browse the repository at this point in the history
- Change command to aiida-hq
- add aiida-hq install <computer>
- [ ] add tests
- [ ] start server
- [ ] pre-commit lint
  • Loading branch information
unkcpz committed Jun 5, 2024
1 parent fd11895 commit 5b56629
Show file tree
Hide file tree
Showing 21 changed files with 501 additions and 112 deletions.
87 changes: 87 additions & 0 deletions -
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import click

from aiida.cmdline.utils import echo

from .root import cmd_root
from .params import arguments

@cmd_root.group("server")
def server_group():
"""Commands for interacting with the HQ server."""


@server_group.command("start")
@arguments.COMPUTER()
def cmd_start(computer):
"""Start the HyperQueue server."""

with computer.get_transport() as transport:
retval, _, _ = transport.exec_command_wait("hq server info")

if retval == 0:
echo.echo_info("server is already running!")
return

with computer.get_transport() as transport:
# FIXME: It requires to sleep a bit after the nohup
# see https://github.com/aiidateam/aiida-core/issues/6377
# but the sleep solution is incorrect!!! Since the sleep will always return 0.
# this not rely on https://github.com/aiidateam/aiida-core/pull/6452
retval, _, stderr = transport.exec_command_wait(
"nohup hq server start 1>$HOME/.hq-stdout 2>$HOME/.hq-stderr &",
timeout=0.1,
)

if retval != 0:
echo.echo_critical(f"unable to start the server: {stderr}")

echo.echo_success("HQ server started!")

@server_group.command("stop")
@arguments.COMPUTER()
def cmd_stop(computer):
"""Start the HyperQueue server."""

with computer.get_transport() as transport:
retval, _, _ = transport.exec_command_wait("hq server info")

if retval != 0:
echo.echo_info("server is not running!")
return

echo.echo_info("Stop the hq server will close all allocs.")

with computer.get_transport() as transport:
retval, _, stderr = transport.exec_command_wait(
"hq server stop"
)

if retval != 0:
echo.echo_critical(f"unable to stop the server: {stderr}")

echo.echo_success("HQ server stopped!")

@server_group.command("restart")
@arguments.COMPUTER()
@click.pass_context
def cmd_restart(ctx, computer):
"""Restart the HyperQueue server by stop and start again"""
ctx.invoke(cmd_stop)
ctx.invoke(cmd_start)


@server_group.command("info")
@arguments.COMPUTER()
def cmd_info(computer):
"""Get information on the HyperQueue server."""

with computer.get_transport() as transport:
retval, stdout, stderr = transport.exec_command_wait("hq server info")

if retval != 0:
echo.echo_critical(
f"cannot obtain HyperQueue server information: {stderr}\n"
"Try starting the server with `aiida-qe server start`."
)

echo.echo(stdout)
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ AiiDA plugin for the [HyperQueue](https://github.com/It4innovations/hyperqueue)

Allows task farming on Slurm machines through the submission of AiiDA calculations to the [HyperQueue](https://github.com/It4innovations/hyperqueue) metascheduler.
See the [Documentation](http://aiida-hyperqueue.readthedocs.io/) for more information on how to install and use the plugin.

## For developers

To control the loglevel of command, since we use the `echo` module from aiida, the CLI loglever can be set through `logging.verdi_loglevel`.
8 changes: 8 additions & 0 deletions aiida_hyperqueue/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
from aiida.cmdline.params import options as core_options
from aiida.cmdline.params import types as core_types

from .root import cmd_root
from .install import cmd_install
from .server import cmd_info, cmd_start, cmd_stop
from .alloc import cmd_list, cmd_add, cmd_remove
74 changes: 9 additions & 65 deletions aiida_hyperqueue/cli.py → aiida_hyperqueue/cli/alloc.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,11 @@
# -*- coding: utf-8 -*-
"""Command line interface (CLI) for aiida_hyperqueue."""

import click
from aiida.cmdline.params import options, arguments
from aiida.cmdline.utils import decorators, echo
from aiida.cmdline.commands.cmd_data import verdi_data


@verdi_data.group("hyperqueue")
def data_cli():
"""Command line interface for aiida-hyperqueue"""

from aiida.cmdline.params import options, arguments
from aiida.cmdline.utils import echo

@data_cli.group("server")
def server_group():
"""Commands for interacting with the HQ server."""


@server_group.command("start")
@arguments.COMPUTER()
@decorators.with_dbenv()
def start_cmd(computer):
"""Start the HyperQueue server."""

with computer.get_transport() as transport:
retval, _, _ = transport.exec_command_wait("hq server info")

if retval == 0:
echo.echo_info("server is already running!")
return

with computer.get_transport() as transport:
retval, _, stderr = transport.exec_command_wait(
"nohup hq server start 1>$HOME/.hq-stdout 2>$HOME/.hq-stderr &"
)

if retval != 0:
echo.echo_critical(f"unable to start the server: {stderr}")

echo.echo_success("HQ server started!")


@server_group.command("info")
@arguments.COMPUTER()
@decorators.with_dbenv()
def info_cmd(computer):
"""Get information on the HyperQueue server."""

with computer.get_transport() as transport:
retval, stdout, stderr = transport.exec_command_wait("hq server info")

if retval != 0:
echo.echo_critical(
f"cannot obtain HyperQueue server information: {stderr}\n"
"Try starting the server with `verdi data hyperqueue server start`."
)

echo.echo(stdout)

from .root import cmd_root

@data_cli.group("alloc")
@cmd_root.group("alloc")
def alloc_group():
"""Commands to configure HQ allocations."""

Expand Down Expand Up @@ -102,13 +48,13 @@ def alloc_group():
default=1,
help=("Option to allow pooled jobs to launch on multiple nodes."),
)
@decorators.with_dbenv()
def add_cmd(
def cmd_add(
slurm_options, computer, time_limit, hyper_threading, backlog, workers_per_alloc
):
"""Add a new allocation to the HQ server."""

hyper = "" if hyper_threading else "--cpus no-ht"
# from hq==0.13.0: ``--cpus=no-ht`` is now changed to a flag ``--no-hyper-threading``
hyper = "" if hyper_threading else "--no-hyper-threading"

with computer.get_transport() as transport:
retval, _, stderr = transport.exec_command_wait(
Expand All @@ -124,8 +70,7 @@ def add_cmd(

@alloc_group.command("list")
@arguments.COMPUTER()
@decorators.with_dbenv()
def list_cmd(computer):
def cmd_list(computer):
"""List the allocations on the HQ server."""

with computer.get_transport() as transport:
Expand All @@ -140,8 +85,7 @@ def list_cmd(computer):
@alloc_group.command("remove")
@click.argument("alloc_id")
@options.COMPUTER(required=True)
@decorators.with_dbenv()
def remove_cmd(alloc_id, computer):
def cmd_remove(alloc_id, computer):
"""Remove an allocation from the HQ server."""

with computer.get_transport() as transport:
Expand Down
109 changes: 109 additions & 0 deletions aiida_hyperqueue/cli/install.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# -*- coding: utf-8 -*-
import click
import tempfile
import requests
import tarfile
from pathlib import Path

from aiida import orm
from aiida.cmdline.utils import echo

from .params import arguments
from .root import cmd_root


@cmd_root.command("install")
@arguments.COMPUTER()
@click.option(
"-p",
"--remote-bin-dir",
type=click.Path(),
default=Path("$HOME/bin/"),
help="remote bin path hq will stored.",
)
@click.option(
"--hq-version", type=str, default="0.19.0", help="the hq version will be installed."
)
# TODO: separate the bashrc write and make it optional.
# TODO: should also support different arch binary??
def cmd_install(computer: orm.Computer, remote_bin_dir: Path, hq_version: str):
"""Install the hq binary to the computer through the transport"""

# The minimal hq version we support is 0.13.0, check the minor version
try:
_, minor, _ = hq_version.split('.')
except ValueError as e:
echo.echo_critical(f"Cannot parse the version {hq_version}: {e}")
else:
if int(minor) < 13:
# `--no-hyper-threading` replace `--cpus=no-ht` from 0.13.0
# If older version installed, try to not use `--no-hyper-threading` for `aiida-hq alloc add`.
echo.echo_warning(
f"You are installing hq version {hq_version}, please do not use `--no-hyper-threading` for `aiida-hq alloc add`."
" Or install version >= 0.13.0"
)

# Download the hq binary with specific version to local temp folder
# raise if the version not found
# Then upload to the remote using opened transport of computer
with tempfile.TemporaryDirectory() as temp_dir:
url = f"https://github.com/It4innovations/hyperqueue/releases/download/v{hq_version}/hq-v{hq_version}-linux-x64.tar.gz"
response = requests.get(url, stream=True)
rcode = response.status_code

if rcode != 200:
echo.echo_error(
"Cannot download the hq, please check the version is exist."
)

temp_dir = Path(temp_dir)
tar_path = temp_dir / "hq.tar.gz"

with open(tar_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)

with tarfile.open(tar_path, "r") as tar:
tar.extractall(path=temp_dir)

echo.echo_success(f"The hq version {hq_version} binary downloaded.")

bin_path = temp_dir / "hq"

# upload the binary to remote
# TODO: try not override if the binary exist, put has overwrite=True as default
with computer.get_transport() as transport:
# Get the abs path of remote bin dir
retval, stdout, stderr = transport.exec_command_wait(f"echo {str(remote_bin_dir)}")
if retval !=0:
echo.echo_critical(f"Not able to parse remote bin dir {remote_bin_dir}, exit_code={retval}")
else:
remote_bin_dir = Path(stdout.strip())

# first check if the hq exist in the target folder
if transport.isfile(str(remote_bin_dir / "hq")):
echo.echo_info(
f"hq exist in the {remote_bin_dir} on remote, will override it."
)

transport.makedirs(path=remote_bin_dir, ignore_existing=True)
transport.put(
localpath=str(bin_path.resolve()), remotepath=str(remote_bin_dir)
)

# XXX: should transport.put take care of this already??
transport.exec_command_wait(f"chmod +x {str(remote_bin_dir / 'hq')}")

# write to bashrc
identity_str = "by aiida-hq"
retval, _, stderr = transport.exec_command_wait(
f"grep -q '# {identity_str}' ~/.bashrc || echo '# {identity_str}\nexport PATH=$HOME/bin:$PATH' >> ~/.bashrc"
)

if retval != 0:
echo.echo_critical(
f"Not able to set set the path $HOME/bin to your remote bashrc, try to do it manually.\n"
f"Info: {stderr}"
)

echo.echo_success("The hq binary installed in remote")
Empty file.
4 changes: 4 additions & 0 deletions aiida_hyperqueue/cli/params/arguments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# -*- coding: utf-8 -*-
from aiida.cmdline.params import arguments as core_arguments

COMPUTER = core_arguments.COMPUTER
33 changes: 33 additions & 0 deletions aiida_hyperqueue/cli/params/options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
"""Reusable options for CLI commands."""

import functools

import click
from aiida.cmdline.params import options as core_options
from aiida.cmdline.params import types as core_types

__all__ = (
"PROFILE",
"VERBOSITY",
"VERSION",
)

PROFILE = functools.partial(
core_options.PROFILE,
type=core_types.ProfileParamType(load_profile=True),
expose_value=False,
)

# Clone the ``VERBOSITY`` option from ``aiida-core`` so the ``-v`` short flag can be removed, since that overlaps with
# the flag of the ``VERSION`` option of this CLI.
VERBOSITY = core_options.VERBOSITY.clone()
VERBOSITY.args = ("--verbosity",)

VERSION = core_options.OverridableOption(
"-v",
"--version",
type=click.STRING,
required=False,
help="Select the version of the installed configuration.",
)
37 changes: 37 additions & 0 deletions aiida_hyperqueue/cli/root.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
"""Command line interface `aiida-hq` for aiida-hyperqueue.
The CLI implementation prototype from `aiida-pseudo`.
"""

import click

from aiida.cmdline.groups.verdi import VerdiCommandGroup

from .params import options


class CustomVerdiCommandGroup(VerdiCommandGroup):
"""Subclass of :class:`aiida.cmdline.groups.verdi.VerdiCommandGroup` for the CLI.
This subclass overrides the verbosity option to use a custom one that removes the ``-v`` short version of the option
since that is used by other options in this CLI and so would clash.
"""

@staticmethod
def add_verbosity_option(cmd):
"""Apply the ``verbosity`` option to the command, which is common to all subcommands."""
if cmd is not None and "verbosity" not in [param.name for param in cmd.params]:
cmd = options.VERBOSITY()(cmd)

return cmd


@click.group(
"aiida-hq",
cls=CustomVerdiCommandGroup,
context_settings={"help_option_names": ["-h", "--help"]},
)
@options.VERBOSITY()
@options.PROFILE()
def cmd_root():
"""CLI for the ``aiida-hyperqueue`` plugin."""
Loading

0 comments on commit 5b56629

Please sign in to comment.