Skip to content

Commit

Permalink
Signal handling
Browse files Browse the repository at this point in the history
  • Loading branch information
maouw committed Sep 28, 2023
1 parent dd719e1 commit cb6da91
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 27 deletions.
45 changes: 24 additions & 21 deletions hyakvnc/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
app_job_ids = []


def cmd_create(container_path: Union[str, Path], dry_run=False) -> Union[HyakVncSession, None]:
def cmd_create(container_path: Union[str, Path], dry_run=False):
"""
Allocates a compute node, starts a container, and launches a VNC session on it.
:param container_path: Path to container to run
Expand Down Expand Up @@ -172,30 +172,34 @@ def create_node_signal_handler(signal_number, frame):
logger.info("Waiting for Apptainer instance to start running")
if wait_for_file(str(instance_file), timeout=app_config.sbatch_post_timeout):
time.sleep(10) # sleep to wait for apptainer to actually start vncserver
try:
sessions = HyakVncSession.find_running_sessions(app_config, job_id=job_id)
if len(sessions) == 0:
logger.warning("No running VNC jobs found")
kill_self()
sessions = [s for s in sessions if s.job_id == job_id]
if len(sessions) == 0:
logger.warning("No running VNC jobs found")
kill_self()
sesh = sessions[0]
except (ValueError, FileNotFoundError) as e:
logger.error(f"Could not load instance file: {instance_file} due to error: {e}")

def get_session():
try:
sessions = HyakVncSession.find_running_sessions(app_config, job_id=job_id)
if sessions:
my_sessions = [s for s in sessions if s.job_id == job_id]
if my_sessions:
return my_sessions[0]
except LookupError as e:
logger.debug(f"Could not get session info for job {job_id}: {e}")
return None

sesh = repeat_until(
lambda x: get_session(), lambda x: x is not None, timeout=app_config.sbatch_post_timeout * 2
)
if not sesh:
logger.warning(f"No running VNC sessions found for job {job_id}. Canceling and exiting.")
kill_self()
else:
if not sesh.wait_until_alive(timeout=app_config.sbatch_post_timeout):
logger.error("Could not find a running VNC session for the instance {sesh}")
if sesh.wait_until_alive(timeout=app_config.sbatch_post_timeout):
print_connection_string(session=sesh)
exit(0)
else:
logger.error("VNC session for SLURM job {job_id} doesn't seem to be alive")
kill_self()
print_connection_string(session=sesh)
return sesh
else:
logger.info(f"Could not find instance file at {instance_file} before timeout")
cancel_job(job_id)
logger.info(f"Canceled job {job_id} before timeout")
return None
kill_self()


def cmd_stop(job_id: Optional[int] = None, stop_all: bool = False):
Expand Down Expand Up @@ -238,7 +242,6 @@ def signal_handler(signal_number, frame):
signal.signal(signal.SIGSTOP, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)


assert (job_id is not None) ^ (session is not None), "Must specify either a job id or session"

if job_id:
Expand Down
12 changes: 6 additions & 6 deletions hyakvnc/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@
import sys
import time
from pathlib import Path
from typing import Callable, Optional, Union
from typing import Callable, Optional, Union, Any


def repeat_until(
func: Callable,
condition: Callable[[int], bool],
condition: Callable,
timeout: Optional[float] = None,
poll_interval: float = 1.0,
max_iter: Optional[int] = None,
) -> bool:
) -> Union[Any, None]:
begin_time = time.time()
assert timeout is None or timeout > 0, "Timeout must be greater than zero"
assert poll_interval > 0, "Poll interval must be greater than zero"
Expand All @@ -20,13 +20,13 @@ def repeat_until(
while time.time() < begin_time + timeout:
if max_iter:
if i >= max_iter:
return False
return None
res = func()
if condition(res):
return True
return res
time.sleep(poll_interval)
i += 1
return False
return None


def wait_for_file(path: Union[Path, str], timeout: Optional[float] = None, poll_interval: float = 1.0):
Expand Down

0 comments on commit cb6da91

Please sign in to comment.