diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 83e6846..e075f26 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,7 +8,7 @@ on: branches: - main jobs: - test: + ci-storage-test: runs-on: ubuntu-latest steps: - name: Checkout @@ -32,3 +32,20 @@ jobs: set -e ls -la ~/ci-storage/dimikot/ci-storage [ "$(cat dummy.txt)" = "dummy" ] || { echo "dummy.txt was not restored"; exit 1; } + + docker-boot-self-hosted-runner: + runs-on: ubuntu-latest + timeout-minutes: 3 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Build Docker image + run: | + set -e + cd docker + docker-compose build + - name: Start Docker container + run: | + set -e + cd docker + GH_REPOSITORY=${{ github.repository }} GH_LABELS=ci-storage docker-compose up diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..c94c5d7 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,45 @@ +FROM ubuntu:22.04 + +ARG RUNNER_VERSION + +ENV GH_REPOSITORY="" +ENV GH_LABELS="" +ENV GH_TOKEN="" + +ENV DEBIAN_FRONTEND=noninteractive +RUN true \ + && apt-get update -y \ + && apt-get upgrade -y \ + && apt-get install -y --no-install-recommends \ + awscli jq gh \ + mc gcc git curl wget pv psmisc unzip vim nano telnet net-tools bash-completion \ + libssl-dev apt-transport-https build-essential ca-certificates locales pkg-config + +RUN true \ + && useradd -m ubuntu + +USER ubuntu +RUN true \ + && mkdir /home/ubuntu/actions-runner \ + && cd /home/ubuntu/actions-runner \ + && arch=$(dpkg --print-architecture) \ + && case "$arch" in \ + x86_64|amd64) arch=linux-x64 ;; \ + aarch64|arm64) arch=linux-arm64 ;; \ + *) echo >&2 "unsupported architecture: $arch"; exit 1 ;; \ + esac \ + && curl --no-progress-meter -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-$arch-$RUNNER_VERSION.tar.gz | tar xz + +USER root +RUN /home/ubuntu/actions-runner/bin/installdependencies.sh \ + apt-get autoremove \ + && apt-get clean \ + && apt-get autoclean \ + && rm -rf /var/lib/apt/lists/* + +USER ubuntu +COPY --chmod=755 --chown=ubuntu:ubuntu entrypoint.sh /home/ubuntu + +WORKDIR /home/ubuntu +ENTRYPOINT ["./entrypoint.sh"] +CMD ["./run.sh"] diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..98410ee --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,12 @@ +version: "3.4" +services: + ci-storage: + build: + context: . + dockerfile: ./Dockerfile + args: + - RUNNER_VERSION=2.314.1 + environment: + - GH_REPOSITORY + - GH_LABELS + - GH_TOKEN diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100644 index 0000000..79e8ba3 --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# +# Here we make an opinionated decision to NOT use ephemeral or jit acton +# runners. Reasons: +# - We WANT to reuse the work directory across job runs, that's the whole point +# of ci-storage architecture and its speedup benefits. So once the runner +# finishes some job, we do NOT want it to terminate (as it does in ephemeral +# or jit mode), we want it to CONTINUE listening for more jobs to run. +# - GitHub doesn't allow to remove busy runners via API, which is very good for +# us: in case the container shuts down externaly due to downscaling, we just +# enter the graceful retry loop to delete the corresponding runner via API. +# - One downside happens when a runner container dies unexpectedly (rare). In +# this case, regular "offline" long-living runners are auto-removed by GitHub +# itself once in 2 weeks, whilst ephemeral (or jit) "offline" runners are +# auto-removed in 1 day. But we anyways need to implement some manual removal +# cycle exernally, since even 1 day is way too much for garbage accumulation. +# +set -u -e -o xtrace + +: $GH_REPOSITORY # {owner}/{repo} +: $GH_LABELS +: $GH_TOKEN # used by gh cli + +cd ./actions-runner + +name="ci-storage-$(hostname)" + +token=$(gh api -X POST --jq .token "repos/$GH_REPOSITORY/actions/runners/registration-token") +./config.sh \ + --unattended \ + --url https://github.com/$GH_REPOSITORY \ + --token "$token" \ + --name "$name" \ + --labels "$GH_LABELS" + +cleanup() { + # Retry deleting the runner until it succeeds. + # - Busy runner fails in deletion, so we can retry safely until it becomes + # idle and is successfully deleted. + # - The extrnal orchestrator will eventually kill the container after a large + # timeout (say, 15 minutes or so) needed for a running job to finish. + while :; do + token=$(gh api -X POST --jq .token "repos/$GH_REPOSITORY/actions/runners/remove-token") + ./config.sh remove --token "$token" && break + sleep 5 + : "Retrying deletion till the runner becomes idle and succeeds..." + done +} + +trap "cleanup; exit 130" INT +trap "cleanup; exit 143" TERM + +#eval "$@" & wait $!