Skip to content

Commit

Permalink
Add basic retry loop
Browse files Browse the repository at this point in the history
  • Loading branch information
xjules committed Dec 11, 2023
1 parent 650c5ca commit d7b24fc
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 38 deletions.
66 changes: 36 additions & 30 deletions src/ert/scheduler/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,36 +66,42 @@ async def __call__(
self, start: asyncio.Event, sem: asyncio.BoundedSemaphore
) -> None:
await start.wait()
await sem.acquire()

try:
await self._send(State.SUBMITTING)
await self.driver.submit(
self.real.iens, self.real.job_script, cwd=self.real.run_arg.runpath
)

await self._send(State.STARTING)
await self.started.wait()

await self._send(State.RUNNING)
returncode = await self.returncode
if (
returncode == 0
and forward_model_ok(self.real.run_arg).status
== LoadStatus.LOAD_SUCCESSFUL
):
await self._send(State.COMPLETED)
else:
await self._send(State.FAILED)

except asyncio.CancelledError:
await self._send(State.ABORTING)
await self.driver.kill(self.iens)

await self.aborted.wait()
await self._send(State.ABORTED)
finally:
sem.release()
retries = 0
retry: bool = True
while retry:
await sem.acquire()
try:
await self._send(State.SUBMITTING)
await self.driver.submit(
self.real.iens, self.real.job_script, cwd=self.real.run_arg.runpath
)

await self._send(State.STARTING)
await self.started.wait()

await self._send(State.RUNNING)
returncode = await self.returncode
if (
returncode == 0
and forward_model_ok(self.real.run_arg).status
== LoadStatus.LOAD_SUCCESSFUL
):
await self._send(State.COMPLETED)
retry = False
else:
await self._send(State.FAILED)
retries += 1
retry = retries < self._scheduler._max_submit

except asyncio.CancelledError:
retry = False
await self._send(State.ABORTING)
await self.driver.kill(self.iens)

await self.aborted.wait()
await self._send(State.ABORTED)
finally:
sem.release()

async def _send(self, state: State) -> None:
status = STATE_TO_LEGACY[state]
Expand Down
10 changes: 2 additions & 8 deletions src/ert/scheduler/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,7 @@
import threading
from asyncio.queues import Queue
from dataclasses import asdict
from typing import (
TYPE_CHECKING,
Any,
Callable,
Iterable,
MutableMapping,
Optional,
)
from typing import TYPE_CHECKING, Any, Callable, Iterable, MutableMapping, Optional

from pydantic.dataclasses import dataclass
from websockets import Headers
Expand Down Expand Up @@ -50,6 +43,7 @@ def __init__(self, driver: Optional[Driver] = None) -> None:
self._jobs: MutableMapping[int, Job] = {}
self._tasks: MutableMapping[int, asyncio.Task[None]] = {}
self._events: Queue[Any] = Queue()
self._max_submit: int = 2

self._ee_uri = ""
self._ens_id = ""
Expand Down

0 comments on commit d7b24fc

Please sign in to comment.