From 184a55b0d2103d42f455cbeaed2e9aed55b566ef Mon Sep 17 00:00:00 2001 From: Dimi Kot Date: Thu, 29 Feb 2024 17:11:32 -0800 Subject: [PATCH] Add Dockerfile for self-hosted runner bolierplate Pull Request: https://github.com/dimikot/ci-storage/pull/5 (main) --- .github/workflows/ci.yml | 30 +++++++++++++++++++++- docker/Dockerfile | 45 +++++++++++++++++++++++++++++++++ docker/docker-compose.yml | 12 +++++++++ docker/entrypoint.sh | 53 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 docker/Dockerfile create mode 100644 docker/docker-compose.yml create mode 100644 docker/entrypoint.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 83e6846..61c0840 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,8 +7,9 @@ on: push: branches: - main + jobs: - test: + ci-storage-test: runs-on: ubuntu-latest steps: - name: Checkout @@ -32,3 +33,30 @@ jobs: set -e ls -la ~/ci-storage/dimikot/ci-storage [ "$(cat dummy.txt)" = "dummy" ] || { echo "dummy.txt was not restored"; exit 1; } + + self-hosted-runner-boot-docker: + runs-on: ubuntu-latest + timeout-minutes: 3 + permissions: + actions: write + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Build Docker image + run: | + set -e + cd docker + docker-compose build + - name: Start Docker container + run: | + set -e + cd docker + docker-compose up + env: + GH_REPOSITORY: ${{ github.repository }} + GH_LABELS: ci-storage-test + GH_TOKEN: ${{ secrets.CI_PAT }} +# self-hosted-runner-spawn-job: +# runs-on: ["self-hosted", "ci-storage-test"] +# steps: +# - run: echo "Hello, world!" diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..c94c5d7 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,45 @@ +FROM ubuntu:22.04 + +ARG RUNNER_VERSION + +ENV GH_REPOSITORY="" +ENV GH_LABELS="" +ENV GH_TOKEN="" + +ENV DEBIAN_FRONTEND=noninteractive +RUN true \ + && apt-get update -y \ + && apt-get upgrade -y \ + && apt-get install -y --no-install-recommends \ + awscli jq gh \ + mc gcc git curl wget pv psmisc unzip vim nano telnet net-tools bash-completion \ + libssl-dev apt-transport-https build-essential ca-certificates locales pkg-config + +RUN true \ + && useradd -m ubuntu + +USER ubuntu +RUN true \ + && mkdir /home/ubuntu/actions-runner \ + && cd /home/ubuntu/actions-runner \ + && arch=$(dpkg --print-architecture) \ + && case "$arch" in \ + x86_64|amd64) arch=linux-x64 ;; \ + aarch64|arm64) arch=linux-arm64 ;; \ + *) echo >&2 "unsupported architecture: $arch"; exit 1 ;; \ + esac \ + && curl --no-progress-meter -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-$arch-$RUNNER_VERSION.tar.gz | tar xz + +USER root +RUN /home/ubuntu/actions-runner/bin/installdependencies.sh \ + apt-get autoremove \ + && apt-get clean \ + && apt-get autoclean \ + && rm -rf /var/lib/apt/lists/* + +USER ubuntu +COPY --chmod=755 --chown=ubuntu:ubuntu entrypoint.sh /home/ubuntu + +WORKDIR /home/ubuntu +ENTRYPOINT ["./entrypoint.sh"] +CMD ["./run.sh"] diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..98410ee --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,12 @@ +version: "3.4" +services: + ci-storage: + build: + context: . + dockerfile: ./Dockerfile + args: + - RUNNER_VERSION=2.314.1 + environment: + - GH_REPOSITORY + - GH_LABELS + - GH_TOKEN diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100644 index 0000000..79e8ba3 --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# +# Here we make an opinionated decision to NOT use ephemeral or jit acton +# runners. Reasons: +# - We WANT to reuse the work directory across job runs, that's the whole point +# of ci-storage architecture and its speedup benefits. So once the runner +# finishes some job, we do NOT want it to terminate (as it does in ephemeral +# or jit mode), we want it to CONTINUE listening for more jobs to run. +# - GitHub doesn't allow to remove busy runners via API, which is very good for +# us: in case the container shuts down externaly due to downscaling, we just +# enter the graceful retry loop to delete the corresponding runner via API. +# - One downside happens when a runner container dies unexpectedly (rare). In +# this case, regular "offline" long-living runners are auto-removed by GitHub +# itself once in 2 weeks, whilst ephemeral (or jit) "offline" runners are +# auto-removed in 1 day. But we anyways need to implement some manual removal +# cycle exernally, since even 1 day is way too much for garbage accumulation. +# +set -u -e -o xtrace + +: $GH_REPOSITORY # {owner}/{repo} +: $GH_LABELS +: $GH_TOKEN # used by gh cli + +cd ./actions-runner + +name="ci-storage-$(hostname)" + +token=$(gh api -X POST --jq .token "repos/$GH_REPOSITORY/actions/runners/registration-token") +./config.sh \ + --unattended \ + --url https://github.com/$GH_REPOSITORY \ + --token "$token" \ + --name "$name" \ + --labels "$GH_LABELS" + +cleanup() { + # Retry deleting the runner until it succeeds. + # - Busy runner fails in deletion, so we can retry safely until it becomes + # idle and is successfully deleted. + # - The extrnal orchestrator will eventually kill the container after a large + # timeout (say, 15 minutes or so) needed for a running job to finish. + while :; do + token=$(gh api -X POST --jq .token "repos/$GH_REPOSITORY/actions/runners/remove-token") + ./config.sh remove --token "$token" && break + sleep 5 + : "Retrying deletion till the runner becomes idle and succeeds..." + done +} + +trap "cleanup; exit 130" INT +trap "cleanup; exit 143" TERM + +#eval "$@" & wait $!