Skip to content

Commit

Permalink
map all slurm states
Browse files Browse the repository at this point in the history
  • Loading branch information
gpetretto committed Feb 21, 2024
1 parent c6a2578 commit 0251840
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 1 deletion.
14 changes: 14 additions & 0 deletions src/qtoolkit/core/data_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,20 @@ class QState(QTKEnum):
queue manager (e.g. PBS, SLURM, ...) needs to be
defined.
UNDETERMINED: The job status cannot be determined. This is a permanent
issue, not being solvable by asking again for the job state.
QUEUED: The job is queued for being scheduled and executed.
QUEUED HELD: The job has been placed on hold by the system, the
administrator, or the submitting user.
RUNNING: The job is running on an execution host.
SUSPENDED: The job has been suspended by the user, the system or the
administrator.
REQUEUED: The job was re-queued by the DRM system, and is eligible to run.
REQUEUED HELD: The job was re-queued by the DRM system, and is currently
placed on hold by the system, the administrator, or the submitting user.
DONE: The job finished without an error.
FAILED: The job exited abnormally before finishing.
Note that not all these standardized states are available in the
actual queue manager implementations.
"""
Expand Down
24 changes: 23 additions & 1 deletion src/qtoolkit/io/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@


class SlurmState(QSubState):
BOOT_FAIL = "BOOT_FAIL", "BF"
CANCELLED = "CANCELLED", "CA"
COMPLETING = "COMPLETING", "CG"
COMPLETED = "COMPLETED", "CD"
Expand All @@ -97,7 +98,17 @@ class SlurmState(QSubState):
NODE_FAIL = "NODE_FAIL", "NF"
OUT_OF_MEMORY = "OUT_OF_MEMORY", "OOM"
PENDING = "PENDING", "PD"
PREEMPTED = "PREEMPTED", "PR"
RESV_DEL_HOLD = "RESV_DEL_HOLD", "RD"
REQUEUE_FED = "REQUEUE_FED", "RF"
REQUEUE_HOLD = "REQUEUE_HOLD", "RH"
RESIZING = "RESIZING", "RS"
REVOKED = "REVOKED", "RV"
RUNNING = "RUNNING", "R"
SIGNALING = "SIGNALING", "SI"
SPECIAL_EXIT = "SPECIAL_EXIT", "SE"
STAGE_OUT = "STAGE_OUT", "SO"
STOPPED = "STOPPED", "ST"
SUSPENDED = "SUSPENDED", "S"
TIMEOUT = "TIMEOUT", "TO"

Expand All @@ -108,7 +119,8 @@ def qstate(self) -> QState:


_STATUS_MAPPING = {
SlurmState.CANCELLED: QState.SUSPENDED, # Should this be failed ?
SlurmState.BOOT_FAIL: QState.FAILED,
SlurmState.CANCELLED: QState.FAILED,
SlurmState.COMPLETING: QState.RUNNING,
SlurmState.COMPLETED: QState.DONE,
SlurmState.CONFIGURING: QState.QUEUED,
Expand All @@ -117,7 +129,17 @@ def qstate(self) -> QState:
SlurmState.NODE_FAIL: QState.FAILED,
SlurmState.OUT_OF_MEMORY: QState.FAILED,
SlurmState.PENDING: QState.QUEUED,
SlurmState.PREEMPTED: QState.FAILED,
SlurmState.RESV_DEL_HOLD: QState.QUEUED_HELD,
SlurmState.REQUEUE_FED: QState.QUEUED,
SlurmState.REQUEUE_HOLD: QState.QUEUED,
SlurmState.RESIZING: QState.RUNNING,
SlurmState.REVOKED: QState.FAILED,
SlurmState.RUNNING: QState.RUNNING,
SlurmState.SIGNALING: QState.RUNNING,
SlurmState.SPECIAL_EXIT: QState.FAILED,
SlurmState.STAGE_OUT: QState.RUNNING,
SlurmState.STOPPED: QState.RUNNING,
SlurmState.SUSPENDED: QState.SUSPENDED,
SlurmState.TIMEOUT: QState.FAILED,
}
Expand Down

0 comments on commit 0251840

Please sign in to comment.