Skip to content

Commit

Permalink
Add Dockerfile for self-hosted runner bolierplate
Browse files Browse the repository at this point in the history
Pull Request: #5 (main)
  • Loading branch information
dimikot committed Mar 1, 2024
1 parent 82b728a commit 64a3419
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 1 deletion.
39 changes: 38 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ on:
push:
branches:
- main

jobs:
test:
# Tests ci-storage tool itself.
ci-storage-test:
runs-on: ubuntu-latest
steps:
- name: Checkout
Expand All @@ -32,3 +34,38 @@ jobs:
set -e
ls -la ~/ci-storage/dimikot/ci-storage
[ "$(cat dummy.txt)" = "dummy" ] || { echo "dummy.txt was not restored"; exit 1; }
# Builds and boots a self-hosted runner inside GitHub's infra. Once it's
# settled, there is a container with one self-hosted runner running and
# waiting for jobs with "ci-storage-test" tag to pick up (based on
# docker/Dockerfile image).
self-hosted-runner-boot-docker:
runs-on: ubuntu-latest
timeout-minutes: 5
permissions:
actions: write
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Build Docker image
run: cd docker && docker-compose build
- name: Start Docker container
run: cd docker && docker-compose up
env:
GH_REPOSITORY: ${{ github.repository }}
GH_LABELS: ci-storage-test
GH_TOKEN: ${{ secrets.CI_PAT }}

# The test job with ci-storage-test tag which is initially queued, but then is
# picked up by the self-hosted runner booted in the previous job. In the end,
# the test job sends SIGINT to the container entrypoint.sh PID, so the
# container (based on docker/Dockerfile image) shuts down gracefully.
self-hosted-runner-spawn-job-test:
runs-on: ["self-hosted", "ci-storage-test"]
steps:
- name: Run Hello World job and then terminate run.sh
run: |
set -e -o xtrace
echo "Hello, world!"
cd /home/ubuntu/actions-runner
kill -SIGINT $(cat runner.pid)
45 changes: 45 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
FROM ubuntu:22.04

ARG RUNNER_VERSION=2.314.1

ENV GH_REPOSITORY=""
ENV GH_LABELS=""
ENV GH_TOKEN=""

ENV DEBIAN_FRONTEND=noninteractive
RUN true \
&& apt-get update -y \
&& apt-get upgrade -y \
&& apt-get install -y --no-install-recommends \
awscli jq gh \
mc gcc git curl wget pv psmisc unzip vim nano telnet net-tools bash-completion \
libssl-dev apt-transport-https build-essential ca-certificates locales pkg-config

RUN true \
&& useradd -m ubuntu

USER ubuntu
RUN true \
&& mkdir /home/ubuntu/actions-runner \
&& cd /home/ubuntu/actions-runner \
&& arch=$(dpkg --print-architecture) \
&& case "$arch" in \
x86_64|amd64) arch=linux-x64 ;; \
aarch64|arm64) arch=linux-arm64 ;; \
*) echo >&2 "unsupported architecture: $arch"; exit 1 ;; \
esac \
&& curl --no-progress-meter -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-$arch-$RUNNER_VERSION.tar.gz | tar xz

USER root
RUN /home/ubuntu/actions-runner/bin/installdependencies.sh \
apt-get autoremove \
&& apt-get clean \
&& apt-get autoclean \
&& rm -rf /var/lib/apt/lists/*

USER ubuntu
COPY --chmod=755 --chown=ubuntu:ubuntu entrypoint.sh /home/ubuntu

WORKDIR /home/ubuntu
ENTRYPOINT ["./entrypoint.sh"]
CMD ["./run.sh"]
10 changes: 10 additions & 0 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version: "3.4"
services:
ci-storage:
build:
context: .
dockerfile: ./Dockerfile
environment:
- GH_REPOSITORY
- GH_LABELS
- GH_TOKEN
55 changes: 55 additions & 0 deletions docker/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash
#
# Here we make an opinionated decision to NOT use ephemeral or jit acton
# runners. Reasons:
# - We WANT to reuse the work directory across job runs, that's the whole point
# of ci-storage architecture and its speedup benefits. So once the runner
# finishes some job, we do NOT want it to terminate (as it does in ephemeral
# or jit mode), we want it to CONTINUE listening for more jobs to run.
# - GitHub doesn't allow to remove busy runners via API, which is very good for
# us: in case the container shuts down externaly due to downscaling, we just
# enter the graceful retry loop to delete the corresponding runner via API.
# - One downside happens when a runner container dies unexpectedly (rare). In
# this case, regular "offline" long-living runners are auto-removed by GitHub
# itself once in 2 weeks, whilst ephemeral (or jit) "offline" runners are
# auto-removed in 1 day. But we anyways need to implement some manual removal
# cycle exernally, since even 1 day is way too much for garbage accumulation.
#
set -u -e -o xtrace

: $GH_REPOSITORY # {owner}/{repo}
: $GH_LABELS
: $GH_TOKEN # used by gh cli

cd ./actions-runner

name="ci-storage-$(hostname)"

token=$(gh api -X POST --jq .token "repos/$GH_REPOSITORY/actions/runners/registration-token")
./config.sh \
--unattended \
--url https://github.com/$GH_REPOSITORY \
--token "$token" \
--name "$name" \
--labels "$GH_LABELS"

cleanup() {
# Retry deleting the runner until it succeeds.
# - Busy runner fails in deletion, so we can retry safely until it becomes
# idle and is successfully deleted.
# - The extrnal orchestrator will eventually kill the container after a large
# timeout (say, 15 minutes or so) needed for a running job to finish.
while :; do
token=$(gh api -X POST --jq .token "repos/$GH_REPOSITORY/actions/runners/remove-token")
./config.sh remove --token "$token" && break
sleep 5
: "Retrying deletion till the runner becomes idle and succeeds..."
done
}

trap "cleanup; exit 130" INT
trap "cleanup; exit 143" TERM

echo $$ > runner.pid

eval "$@" & wait $!

0 comments on commit 64a3419

Please sign in to comment.