From 961cf36264ae670f66623367692854105a9658da Mon Sep 17 00:00:00 2001 From: Dimi Kot Date: Fri, 1 Mar 2024 04:33:34 -0800 Subject: [PATCH] Add ci-storage-host container, add ci-storage call when self-hosted-runner initializes Pull Request: https://github.com/dimikot/ci-storage/pull/6 (main) --- .github/workflows/ci.yml | 32 ++++++++++++------- docker/ci-storage-host/Dockerfile | 26 +++++++++++++++ docker/ci-storage-host/entrypoint.sh | 29 +++++++++++++++++ docker/docker-compose.yml | 19 +++++++++-- docker/{ => self-hosted-runner}/Dockerfile | 12 ++++--- docker/self-hosted-runner/README.md | 22 +++++++++++++ docker/{ => self-hosted-runner}/entrypoint.sh | 16 ++++++++++ 7 files changed, 137 insertions(+), 19 deletions(-) create mode 100644 docker/ci-storage-host/Dockerfile create mode 100644 docker/ci-storage-host/entrypoint.sh rename docker/{ => self-hosted-runner}/Dockerfile (84%) create mode 100644 docker/self-hosted-runner/README.md rename docker/{ => self-hosted-runner}/entrypoint.sh (80%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7ce8922..f5e48d9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,9 +36,8 @@ jobs: [ "$(cat dummy.txt)" = "dummy" ] || { echo "dummy.txt was not restored"; exit 1; } # Builds and boots a self-hosted runner inside GitHub's infra. Once it's - # settled, there is a container with one self-hosted runner running and - # waiting for jobs with "ci-storage-test" tag to pick up (based on - # docker/Dockerfile image). + # settled, there is a running container with one self-hosted runner waiting + # for jobs with "ci-storage-test" tag to pick up (based on Dockerfile image). self-hosted-runner-build-and-boot-docker-container: runs-on: ubuntu-latest timeout-minutes: 5 @@ -47,10 +46,14 @@ jobs: steps: - name: Checkout uses: actions/checkout@v2 - - name: Build self-hosted runner Docker image - run: cd docker && docker-compose build - - name: Start self-hosted runner in a Docker container - run: cd docker && docker-compose up + - name: Start test Docker containers + run: | + ssh-keygen -t ed25519 -qf /tmp/key -N "" + export CI_STORAGE_HOST_SSH_KEY="$(cat /tmp/key)" + set -o xtrace + cd docker + docker compose up ci-storage-host -d --build + docker compose up self-hosted-runner --build env: GH_REPOSITORY: ${{ github.repository }} GH_LABELS: ci-storage-test @@ -59,13 +62,20 @@ jobs: # The test job with ci-storage-test tag which is initially queued, but then is # picked up by the self-hosted runner booted in the previous job. In the end, # the test job sends SIGINT to the container entrypoint.sh PID, so the - # container (based on docker/Dockerfile image) shuts down gracefully. + # container (based on Dockerfile image) shuts down gracefully. self-hosted-runner-spawn-job-test: runs-on: ["self-hosted", "ci-storage-test"] steps: - - name: Run test job inside of the self-hosted runner and terminate run.sh + - name: Checkout + uses: actions/checkout@v2 + - name: Run test job inside the self-hosted runner + run: echo "Hello, world!" + - name: Test store + uses: ./ + with: + action: "store" + storage-host: "ci-storage-host" + - name: Kill self-hosted runner container run: | - set -e -o xtrace - echo "Hello, world!" cd /home/ubuntu/actions-runner kill -SIGINT $(cat runner.pid) diff --git a/docker/ci-storage-host/Dockerfile b/docker/ci-storage-host/Dockerfile new file mode 100644 index 0000000..e57c28f --- /dev/null +++ b/docker/ci-storage-host/Dockerfile @@ -0,0 +1,26 @@ +ARG BASE_IMAGE="ubuntu:22.04" + +FROM $BASE_IMAGE + +ENV GH_REPOSITORY="" +ENV CI_STORAGE_HOST_SSH_KEY="" + +ENV DEBIAN_FRONTEND=noninteractive +RUN true \ + && apt-get update -y \ + && apt-get upgrade -y \ + && apt-get install -y --no-install-recommends \ + awscli rsync openssh-server \ + mc gcc git curl wget pv psmisc unzip vim nano telnet net-tools bash-completion \ + libssl-dev apt-transport-https build-essential ca-certificates locales pkg-config \ + && sed -i -e "s|#PermitRootLogin.*|PermitRootLogin no|" /etc/ssh/sshd_config \ + && useradd -m ubuntu \ + && mkdir -p /home/ubuntu/.ssh \ + && chown -R ubuntu:ubuntu /home/ubuntu/.ssh \ + && chmod 700 /home/ubuntu/.ssh + +COPY --chmod=755 entrypoint.sh / + +WORKDIR / +EXPOSE 22/tcp +ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/ci-storage-host/entrypoint.sh b/docker/ci-storage-host/entrypoint.sh new file mode 100644 index 0000000..f36fb14 --- /dev/null +++ b/docker/ci-storage-host/entrypoint.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# +# A container which holds ci-storage saved slots. Its ~ubuntu/ci-storage should +# be persistent across container restarts. +# +set -u -e + +if [ "${CI_STORAGE_HOST_SSH_KEY:-}" = "" ]; then + echo "CI_STORAGE_HOST_SSH_KEY is not set, exiting..." + exit 1 +fi + +cd /home/ubuntu + +echo "$CI_STORAGE_HOST_SSH_KEY" > .ssh/id_ed25519 +chmod 600 .ssh/id_ed25519 +ssh-keygen -f .ssh/id_ed25519 -y > .ssh/authorized_keys +chown -R ubuntu:ubuntu .ssh + +# This code is for simplifying the CI tests and allow self-hosted-runner to boot +# in docker-compose. In real world, the 1st slot created should contain the real +# files (e.g. a cloned git repo). +if [ ! -e ci-storage -a "${GH_REPOSITORY:-}" != "" ]; then + mkdir -p ci-storage/$GH_REPOSITORY/initial + chown -R ubuntu:ubuntu ci-storage +fi + +mkdir -p /var/run/sshd +exec /usr/sbin/sshd -D -o ListenAddress=0.0.0.0 diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 51d0616..a402b64 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,10 +1,23 @@ version: "3.4" services: - ci-storage: + ci-storage-host: build: - context: . - dockerfile: ./Dockerfile + context: ci-storage-host + dockerfile: Dockerfile + ports: + - 10022:22 + environment: + - GH_REPOSITORY + - CI_STORAGE_HOST_SSH_KEY + self-hosted-runner: + build: + context: self-hosted-runner + additional_contexts: + root: .. + dockerfile: Dockerfile environment: - GH_REPOSITORY - GH_LABELS - GH_TOKEN + - CI_STORAGE_HOST=ci-storage-host + - CI_STORAGE_HOST_SSH_KEY diff --git a/docker/Dockerfile b/docker/self-hosted-runner/Dockerfile similarity index 84% rename from docker/Dockerfile rename to docker/self-hosted-runner/Dockerfile index ea7b436..9dc5f07 100644 --- a/docker/Dockerfile +++ b/docker/self-hosted-runner/Dockerfile @@ -7,21 +7,23 @@ ARG RUNNER_VERSION="2.314.1" ENV GH_REPOSITORY="" ENV GH_LABELS="" ENV GH_TOKEN="" +ENV CI_STORAGE_HOST="" +ENV CI_STORAGE_HOST_SSH_KEY="" ENV DEBIAN_FRONTEND=noninteractive RUN true \ && apt-get update -y \ && apt-get upgrade -y \ && apt-get install -y --no-install-recommends \ - awscli jq gh \ + awscli jq gh rsync openssh-client \ mc gcc git curl wget pv psmisc unzip vim nano telnet net-tools bash-completion \ - libssl-dev apt-transport-https build-essential ca-certificates locales pkg-config - -RUN true \ + libssl-dev apt-transport-https build-essential ca-certificates locales pkg-config \ && useradd -m ubuntu USER ubuntu RUN true \ + && mkdir -p /home/ubuntu/.ssh \ + && chmod 700 /home/ubuntu/.ssh \ && mkdir /home/ubuntu/actions-runner \ && cd /home/ubuntu/actions-runner \ && arch=$(dpkg --print-architecture) \ @@ -41,6 +43,7 @@ RUN /home/ubuntu/actions-runner/bin/installdependencies.sh \ USER ubuntu COPY --chmod=755 --chown=ubuntu:ubuntu entrypoint.sh /home/ubuntu +COPY --chmod=755 --from=root ci-storage /usr/bin/ci-storage WORKDIR /home/ubuntu ENTRYPOINT ["./entrypoint.sh"] @@ -48,4 +51,3 @@ ENTRYPOINT ["./entrypoint.sh"] # If overridden in the derived image, evals this as "ubuntu" user as a shell # script after config.sh, but before run.sh. CMD [] - diff --git a/docker/self-hosted-runner/README.md b/docker/self-hosted-runner/README.md new file mode 100644 index 0000000..bcfa6f7 --- /dev/null +++ b/docker/self-hosted-runner/README.md @@ -0,0 +1,22 @@ +# Auto-Scaling Self-Hosted Runner Image + +You can build an image from this Dockerfile and use it to launch as many +self-hosted runners as you want. An example scenario: + +1. Build an image based off this Dockerfile and publish it. You'll likely want + to install some more software into that image (e.g. Node, Python etc.), so it + may make sense to extend the base image with your own commands. +2. Run AWS ECS cluster (with e.g. AWS Fargate) and use the image you just + published. Configure its environment variables accordingly: GH_REPOSITORY, + GH_LABELS, GH_TOKEN etc. - see details in entrypoint.sh. +3. Set up auto-scaling rules in the ECS cluster based on the containers' CPU + usage. The running containers are safe to shut down at anytime if it's done + gracefully and with high timeout (to let all the running workflow jobs finish + there and de-register the runner). +4. And here comes the perf magic: when the container first boots, but before it + becomes available for the jobs, it pre-initializes its work directory from + ci-storage slots storage (see CI_STORAGE_HOST). So when a job is picked up, + it already has its work directory pre-created and having most of the build + artifacts of someone else. If the job then uses ci-storage GitHub action to + restore the files from a slot, it will be very quick, because most of the + files are already there. diff --git a/docker/entrypoint.sh b/docker/self-hosted-runner/entrypoint.sh similarity index 80% rename from docker/entrypoint.sh rename to docker/self-hosted-runner/entrypoint.sh index 2ae1d98..ddd5ee5 100644 --- a/docker/entrypoint.sh +++ b/docker/self-hosted-runner/entrypoint.sh @@ -24,6 +24,22 @@ set -u -e -o xtrace cd ./actions-runner name="ci-storage-$(hostname)" +repo_name="${GH_REPOSITORY##*/}" +local_dir=_work/$repo_name/$repo_name + +set +o xtrace +if [ "${CI_STORAGE_HOST_SSH_KEY:-}" != "" ]; then + echo "$CI_STORAGE_HOST_SSH_KEY" > ~/.ssh/id_ed25519 + chmod 600 ~/.ssh/id_ed25519 +fi +set -o xtrace + +if [ "${CI_STORAGE_HOST:-}" != "" ]; then + ssh-keyscan -H "$CI_STORAGE_HOST" >> ~/.ssh/known_hosts + chmod 600 ~/.ssh/known_hosts + mkdir -p $local_dir + ci-storage --storage-host="$CI_STORAGE_HOST" --storage-dir="~/ci-storage/$GH_REPOSITORY" --slot-id="*" --local-dir="$local_dir" load +fi token=$(gh api -X POST --jq .token "repos/$GH_REPOSITORY/actions/runners/registration-token") ./config.sh \