-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3014 from open-formulieren/feature/2927-celery-pr…
…obes [#2927] enable readiness + liveness check for celery worker
- Loading branch information
Showing
8 changed files
with
188 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
#!/usr/bin/env python | ||
# | ||
# Check the health of a Celery worker. | ||
# | ||
# The worker process writes and periodically touches a number of files that indicate it | ||
# is available and still healthy. If the worker becomes unhealthy for any reason, the | ||
# timestamp of when the heartbeat file was last touched will not update and the delta | ||
# becomes too big, allowing (container) orchestration to terminate and restart the | ||
# worker process. | ||
# | ||
# Example usage with Kubernetes, as a liveness probe: | ||
# | ||
# .. code-block:: yaml | ||
# | ||
# livenessProbe: | ||
# exec: | ||
# command: | ||
# - python | ||
# - /app/bin/check_celery_worker_liveness.py | ||
# initialDelaySeconds: 10 | ||
# periodSeconds: 30 # must be smaller than `MAX_WORKER_LIVENESS_DELTA` | ||
# | ||
# Reference: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command | ||
# | ||
# Supported environment variables: | ||
# | ||
# * ``MAX_WORKER_LIVENESS_DELTA``: maximum delta between heartbeats before reporting | ||
# failure, in seconds. Defaults to 60 (one minute). | ||
|
||
|
||
import os | ||
import sys | ||
import time | ||
from pathlib import Path | ||
|
||
HEARTBEAT_FILE = Path(__file__).parent.parent / "tmp" / "celery_worker_heartbeat" | ||
READINESS_FILE = Path(__file__).parent.parent / "tmp" / "celery_worker_ready" | ||
MAX_WORKER_LIVENESS_DELTA = int(os.getenv("MAX_WORKER_LIVENESS_DELTA", 60)) # seconds | ||
|
||
|
||
# check if worker is ready | ||
if not READINESS_FILE.is_file(): | ||
print("Celery worker not ready.") | ||
sys.exit(1) | ||
|
||
# check if worker is live | ||
if not HEARTBEAT_FILE.is_file(): | ||
print("Celery worker heartbeat not found.") | ||
sys.exit(1) | ||
|
||
# check if worker heartbeat satisfies constraint | ||
stats = HEARTBEAT_FILE.stat() | ||
worker_timestamp = stats.st_mtime | ||
current_timestamp = time.time() | ||
time_diff = current_timestamp - worker_timestamp | ||
|
||
if time_diff > MAX_WORKER_LIVENESS_DELTA: | ||
print("Celery worker heartbeat: interval exceeds constraint (60s).") | ||
sys.exit(1) | ||
|
||
print("Celery worker heartbeat found: OK.") | ||
sys.exit(0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import os | ||
import subprocess | ||
import time | ||
from pathlib import Path | ||
|
||
from django.conf import settings | ||
from django.test import TestCase | ||
|
||
from openforms.celery import READINESS_FILE, app | ||
|
||
START_WORKER_SCRIPT = Path(settings.BASE_DIR) / "bin" / "celery_worker.sh" | ||
|
||
|
||
class CeleryTest(TestCase): | ||
def setUp(self): | ||
def shutdown_celery(): | ||
app.control.shutdown() | ||
if READINESS_FILE.is_file(): | ||
READINESS_FILE.unlink(missing_ok=True) | ||
|
||
self.addCleanup(shutdown_celery) | ||
|
||
def test_celery_worker_health_check(self): | ||
"""Assert that READINESS_FILE exists after worker has started but not before and not after | ||
the shutdown | ||
""" | ||
assert ( | ||
not READINESS_FILE.is_file() | ||
), "Celery worker not started but READINESS_FILE found" | ||
|
||
# start Celery worker | ||
process = subprocess.Popen( | ||
[START_WORKER_SCRIPT], | ||
cwd=settings.BASE_DIR, | ||
stdout=subprocess.DEVNULL, | ||
stderr=subprocess.DEVNULL, | ||
env={**os.environ, "ENABLE_COVERAGE": os.environ.get("COVERAGE_RUN", "")}, | ||
) | ||
|
||
# wait for READINESS_FILE to be created, break out as soon as possible | ||
start = time.time() | ||
while (time.time() - start) <= 60: | ||
if READINESS_FILE.is_file(): | ||
break | ||
# wait a bit longer... | ||
time.sleep(1) | ||
else: | ||
self.fail("READINESS_FILE was not created within 60 seconds") | ||
|
||
# stop the worker process | ||
process.terminate() # sends SIGTERM, (warm) shutting down the worker. | ||
process.wait(timeout=60) # wait for process to terminate | ||
|
||
# now assert that the READINESS FILE was deleted as part of the shutdown | ||
# procedure | ||
start = time.time() | ||
while (time.time() - start) <= 60: | ||
if not READINESS_FILE.is_file(): | ||
break | ||
# wait a bit longer... | ||
time.sleep(1) | ||
else: | ||
self.fail("READINESS_FILE was not cleaned up within 60 seconds") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# This folder contains temporary files generated by the project, | ||
# e.g. files for testing the readiness/liveness of celery workers |