Skip to content

Commit

Permalink
runner scalability adjustments from applied testing
Browse files Browse the repository at this point in the history
  • Loading branch information
mlin committed Nov 25, 2019
1 parent ad90d50 commit 24cbcbd
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 10 deletions.
40 changes: 32 additions & 8 deletions WDL/runtime/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import traceback
import glob
import time
import random
import multiprocessing
import threading
import shutil
Expand Down Expand Up @@ -317,11 +318,15 @@ def _run(
# stream stderr into log
with PygtailLogger(logger, os.path.join(self.host_dir, "stderr.txt")) as poll_stderr:
# poll for container exit
running = False
while exit_code is None:
time.sleep(1)
time.sleep(random.uniform(1.0, 2.0)) # spread out work over the GIL
if terminating():
raise Terminated() from None
if "running" in self._observed_states:
if not running:
logger.notice("container running") # pyre-fixme
running = True
poll_stderr()
exit_code = self.poll_service(logger, svc)
logger.debug(
Expand Down Expand Up @@ -478,14 +483,33 @@ def chown(self, logger: logging.Logger, client: docker.DockerClient) -> None:
chown -RP {os.geteuid()}:{os.getegid()} {shlex.quote(os.path.join(self.container_dir, 'work'))}
""".strip()
volumes = {self.host_dir: {"bind": self.container_dir, "mode": "rw"}}
logger.debug(_("post-task chown", script=script, volumes=volumes))
try:
logger.debug(_("post-task chown", script=script, volumes=volumes))
client.containers.run(
"alpine:3",
command=["/bin/ash", "-c", script],
volumes=volumes,
auto_remove=True,
)
chowner = None
try:
chowner = client.containers.run(
"alpine:3",
command=["/bin/ash", "-c", script],
volumes=volumes,
detach=True,
)
chowner_status = None
while chowner_status is None:
try:
chowner_status = chowner.wait()
except Exception as exn:
s_exn = str(exn)
if "timed out" not in s_exn and "Timeout" not in s_exn:
raise
logger.warning("post-task chown is taking a long time")

assert (
isinstance(chowner_status, dict)
and chowner_status.get("StatusCode", -1) == 0
), str(chowner_status)
finally:
if chowner:
chowner.remove()
except:
logger.exception("post-task chown failed")

Expand Down
4 changes: 2 additions & 2 deletions tests/test_4taskrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ def test_logging_std_err(self, capture):
for record in std_error_msgs:
line_written = int(record.msg.split('=')[1])
self.assertGreater(record.created, line_written)
# check line logged within 3 seconds of being written
self.assertGreater(line_written+3, record.created)
# check line logged within 4 seconds of being written
self.assertGreater(line_written+4, record.created)

@log_capture()
def test_logging_std_err_captures_full_line(self, capture):
Expand Down

0 comments on commit 24cbcbd

Please sign in to comment.