Skip to content

Commit

Permalink
Merge pull request #516 from opensafely-core/stats-shorter-timeout
Browse files Browse the repository at this point in the history
fix: shorter timeout on getting stats
  • Loading branch information
bloodearnest authored Nov 16, 2022
2 parents 59afa3e + 04a2776 commit 870c804
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 11 deletions.
9 changes: 7 additions & 2 deletions jobrunner/lib/docker_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,28 @@
from jobrunner.lib.subprocess_utils import subprocess_run


def get_volume_and_container_sizes():
DEFAULT_TIMEOUT = 10


def get_volume_and_container_sizes(timeout=DEFAULT_TIMEOUT):
response = subprocess_run(
["docker", "system", "df", "--verbose", "--format", "{{json .}}"],
capture_output=True,
check=True,
timeout=timeout,
)
data = json.loads(response.stdout)
volumes = {row["Name"]: _parse_size(row["Size"]) for row in data["Volumes"]}
containers = {row["Names"]: _parse_size(row["Size"]) for row in data["Containers"]}
return volumes, containers


def get_container_stats():
def get_container_stats(timeout=DEFAULT_TIMEOUT):
response = subprocess_run(
["docker", "stats", "--no-stream", "--format", "{{json .}}"],
capture_output=True,
check=True,
timeout=timeout,
)
data = [json.loads(line) for line in response.stdout.splitlines()]
return {
Expand Down
22 changes: 13 additions & 9 deletions jobrunner/record_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import logging
import sqlite3
import subprocess
import sys
import time

Expand Down Expand Up @@ -56,15 +57,18 @@ def get_database_connection(filename):


def log_stats(connection):
stats = get_all_stats()
# If no containers are running then don't log anything
if not stats["containers"]:
return
timestamp = datetime.datetime.utcnow().isoformat()
connection.execute(
"INSERT INTO stats (timestamp, data) VALUES (?, ?)",
[timestamp, json.dumps(stats)],
)
try:
stats = get_all_stats()
# If no containers are running then don't log anything
if not stats["containers"]:
return
timestamp = datetime.datetime.utcnow().isoformat()
connection.execute(
"INSERT INTO stats (timestamp, data) VALUES (?, ?)",
[timestamp, json.dumps(stats)],
)
except subprocess.TimeoutExpired:
log.exception("Getting docker stats timed out")


def get_all_stats():
Expand Down
16 changes: 16 additions & 0 deletions tests/test_record_stats.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import logging
import subprocess

from jobrunner import record_stats
from jobrunner.models import State, StatusCode
from tests.conftest import get_trace
Expand Down Expand Up @@ -40,3 +43,16 @@ def test_record_tick_trace(db, freezer):
assert span.parent.span_id == root.context.span_id

assert "SUCCEEDED" not in [s.name for s in spans]


def test_log_stats(db, caplog, monkeypatch):
def error():
raise subprocess.TimeoutExpired("test me", 10)

caplog.set_level(logging.INFO)
monkeypatch.setattr(record_stats, "get_all_stats", error)

record_stats.log_stats(None)

assert caplog.records[-1].msg == "Getting docker stats timed out"
assert "test me" in caplog.records[-1].exc_text

0 comments on commit 870c804

Please sign in to comment.