Skip to content

Commit

Permalink
Add DEBUG_SHUTDOWN_DELAY_SEC env variable
Browse files Browse the repository at this point in the history
  • Loading branch information
dimikot committed Mar 11, 2024
1 parent 962dd5b commit ab51442
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 17 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ jobs:
# register a GitHub self-hosted runner and remain waiting for jobs.
docker compose up self-hosted-runner
env:
GH_TOKEN: ${{ secrets.CI_PAT }}
GH_REPOSITORY: ${{ github.repository }}
GH_LABELS: ${{ format('ci-storage-test-{0}-{1}', github.run_id, github.run_attempt) }}
GH_TOKEN: ${{ secrets.CI_PAT }}

# Test the job with "ci-storage-test" tag which is initially queued, but then
# is picked up by the self-hosted-runner container booted in the previous job.
Expand Down
4 changes: 2 additions & 2 deletions docker/compose-up.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ set -e

echo "Building & booting containters on the local laptop for debugging purposes..."

GH_REPOSITORY=$(gh repo view --json owner,name -q '.owner.login + "/" + .name') \
GH_TOKEN=$(gh auth token) \
GH_TOKEN=$(gh auth token) \
GH_REPOSITORY=$(gh repo view --json owner,name -q '.owner.login + "/" + .name') \
docker compose up --build "$@"
7 changes: 4 additions & 3 deletions docker/compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@ services:
host:
condition: service_healthy
environment:
- GH_TOKEN
- GH_REPOSITORY=${GH_REPOSITORY:-dimikot/ci-storage}
- GH_LABELS=${GH_LABELS:-ci-storage}
- GH_TOKEN
- FORWARD_HOST=
- FORWARD_PORTS=22/tcp
- FORWARD_HOST=${FORWARD_HOST:-}
- FORWARD_PORTS=${FORWARD_PORT:-22/tcp}
- CI_STORAGE_HOST=${CI_STORAGE_HOST:-host:22}
- DEBUG_SHUTDOWN_DELAY_SEC=${DEBUG_SHUTDOWN_DELAY_SEC:-1}
secrets:
- CI_STORAGE_PRIVATE_KEY
secrets:
Expand Down
3 changes: 2 additions & 1 deletion docker/self-hosted-runner/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ FROM $BASE_IMAGE

ARG RUNNER_VERSION="2.314.1"

ENV GH_TOKEN=""
ENV GH_REPOSITORY=""
ENV GH_LABELS=""
ENV GH_TOKEN=""
ENV FORWARD_HOST=""
ENV FORWARD_PORTS=""
ENV CI_STORAGE_HOST=""
ENV DEBUG_SHUTDOWN_DELAY_SEC=""
# SECRET: CI_STORAGE_PRIVATE_KEY

ENV DEBIAN_FRONTEND=noninteractive
Expand Down
9 changes: 6 additions & 3 deletions docker/self-hosted-runner/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,17 @@ self-hosted runners as you want. An example scenario:
2. Run an AWS cluster (with e.g. spot instances with manual docker container
boot) and use the image you just published. Configure its environment
variables and secrets accordingly:
- `GH_REPOSITORY`: the repository this runner will serve.
- `GH_LABELS`: labels added to this runner
- `GH_TOKEN`: PAT used to register the runner at github.com
- `GH_REPOSITORY`: the repository this runner will serve
- `GH_LABELS`: labels added to this runner
- `FORWARD_HOST`: some ports at localhost will be rinetd-forwarded to this host (optional)
- `FORWARD_PORTS`: the list of forwarded ports (optional)
- `CI_STORAGE_HOST`: the host which the initial ci-storage run will pull the
data from (optional)
- Pass secret `CI_STORAGE_PRIVATE_KEY`: SSH private key needed to access
- `DEBUG_SHUTDOWN_DELAY_SEC`: a debug feature to test, how much time does the
orchestrator give the container to gracefully shutdown before killing the
container
- pass the secret `CI_STORAGE_PRIVATE_KEY`: SSH private key needed to access
CI_STORAGE_HOST without a password.
3. Set up auto-scaling rules based on e.g. the containers' CPU usage. The
running containers are safe to shut down at anytime if it's done gracefully
Expand Down
15 changes: 10 additions & 5 deletions docker/self-hosted-runner/root/entrypoint.00-validate.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
#!/bin/bash
set -u -e

if [[ "${GH_TOKEN:=}" == "" ]]; then
echo "GH_TOKEN must be set.";
exit 1;
fi

if [[ "${GH_REPOSITORY:=}" != */* ]]; then
echo "GH_REPOSITORY must be set, and the format should be {owner}/{repo}.";
exit 1;
Expand All @@ -11,11 +16,6 @@ if [[ "${GH_LABELS:=}" == "" ]]; then
exit 1;
fi

if [[ "${GH_TOKEN:=}" == "" ]]; then
echo "GH_TOKEN must be set.";
exit 1;
fi

if [[ "${FORWARD_HOST:=}" != "" && ! "$FORWARD_HOST" =~ ^[-.[:alnum:]]+(:[0-9]+)?$ ]]; then
echo "If FORWARD_HOST is passed, it must be a hostname.";
exit 1;
Expand All @@ -35,3 +35,8 @@ if [[ "${CI_STORAGE_HOST:=}" != "" && ! -f /run/secrets/CI_STORAGE_PRIVATE_KEY ]
echo "You must pass secret CI_STORAGE_PRIVATE_KEY when using CI_STORAGE_HOST."
exit 1
fi

if [[ "${DEBUG_SHUTDOWN_DELAY_SEC:=}" != "" && ! "$DEBUG_SHUTDOWN_DELAY_SEC" =~ ^[0-9]+$ ]]; then
echo "If DEBUG_SHUTDOWN_DELAY_SEC is passed, it must be a number.";
exit 1;
fi
18 changes: 16 additions & 2 deletions docker/self-hosted-runner/user/entrypoint.05-config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,32 @@ cd ~/actions-runner && ./config.sh \
--labels "$GH_LABELS"

cleanup() {
echo "Received graceful shutdown signal..."

# A debug facility to test, how much time does the orchestrator give the
# container to gracefully shutdown before killing it.
if [[ "$DEBUG_SHUTDOWN_DELAY_SEC" != "" ]]; then
echo "Artificially delaying shutdown for $DEBUG_SHUTDOWN_DELAY_SEC second(s)..."
count=0
while [[ $count -lt "$DEBUG_SHUTDOWN_DELAY_SEC" ]]; do
sleep 1
count=$((count + 1))
echo " ...$count seconds elapsed"
done
fi

# Retry deleting the runner until it succeeds.
# - Busy runner fails in deletion, so we can retry safely until it becomes
# idle and is successfully deleted.
# - In case we can't delete the runner for a long time still, the extrnal
# orchestrator will eventually kill the container after a large timeout
# (say, 15 minutes or so) needed for a running job to finish.
echo "Received graceful shutdown signal, removing the runner..."
echo "Removing the runner..."
while :; do
token=$(gh api -X POST --jq .token "repos/$GH_REPOSITORY/actions/runners/remove-token")
cd ~/actions-runner && ./config.sh remove --token "$token" && break
sleep 5
echo "Retrying removal till the runner becomes idle and it succeeds..."
echo "Retrying till the runner becomes idle and the removal succeeds..."
done
}

Expand Down

0 comments on commit ab51442

Please sign in to comment.