From 3ded3302fddc0e46143488f7af0317ad721f5f28 Mon Sep 17 00:00:00 2001 From: Marlon Baeten Date: Thu, 25 Apr 2024 13:44:46 +0200 Subject: [PATCH] first commit --- .github/workflows/build.yml | 43 ++++++++ Dockerfile | 23 +++++ README.md | 98 +++++++++++++++++++ bin/backup.sh | 6 ++ bin/docker-file-backup.sh | 48 +++++++++ bin/docker-psql-backup.sh | 190 ++++++++++++++++++++++++++++++++++++ bin/pgurlparams.py | 32 ++++++ 7 files changed, 440 insertions(+) create mode 100644 .github/workflows/build.yml create mode 100644 Dockerfile create mode 100644 README.md create mode 100755 bin/backup.sh create mode 100755 bin/docker-file-backup.sh create mode 100755 bin/docker-psql-backup.sh create mode 100755 bin/pgurlparams.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..c34acd1 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,43 @@ +name: Create and push a Docker image + +on: + push: + branches: ['main'] + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + VERSION: $GITHUB_SHA + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Create version tag + id: version + run: echo "tag=$(git show -s --format="%ct-%h" $GITHUB_SHA)" >> $GITHUB_OUTPUT + + - name: Log in to the container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + pull: true + push: true + tags: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.version.outputs.tag }} + build-args: VERSION=${{ steps.version.outputs.tag }} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c5033ac --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +FROM ghcr.io/tweedegolf/debian:bookworm + +# Install postgresql client +ENV POSTGRESQL_VERSION 16 +RUN curl -s -L https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - \ + && echo "deb http://apt.postgresql.org/pub/repos/apt/ bookworm-pgdg main" > /etc/apt/sources.list.d/pgdg.list \ + && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + postgresql-client-$POSTGRESQL_VERSION \ + bzip2 \ + python3 \ + && rm -rf /var/lib/apt/lists/* + +# https://github.com/restic/restic/releases +ENV RESTIC_VERSION 0.16.4 +# install restic, see https://restic.readthedocs.io/en/stable/020_installation.html#official-binaries +RUN curl -sSLfo /usr/local/bin/restic.bz2 \ + "https://github.com/restic/restic/releases/download/v${RESTIC_VERSION}/restic_${RESTIC_VERSION}_linux_amd64.bz2" \ + && bzip2 -d /usr/local/bin/restic.bz2 \ + && chmod +x /usr/local/bin/restic + +# Install backup scripts +COPY bin/* /usr/local/bin/ +RUN ["/usr/local/bin/backup.sh"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..34fcf76 --- /dev/null +++ b/README.md @@ -0,0 +1,98 @@ +# TGBS + +Tweede golf backup service + +This image allows you to backup data in a docker/kubernetes environment to a +restic repository. This image is best suited to be run at scheduled times (e.g. +as a cron job). + +The most basic operation of this image would be to mount some image/disk into +the container and create a backup from that mount. You can also mount S3 or +GCS object storage buckets to backup their files. Note that this is not +recommended for very large buckets. + +This image also has an option to connect to a PostgreSQL database and create a +backup file/directory, and upload that result to a restic repository. + +Most of these modes of operation are controlled by environment variables. + +## Backup settings +The listing below contains a short overview of the environment variables +supported by restic and which ones are required when backing up to a repository +on Backblaze B2 storage. For details on the environment variables restic +supports, see [their documentation](https://restic.readthedocs.io/en/stable/040_backup.html#environment-variables). + +### RESTIC_REPOSITORY +The repository url for the backup. + +### RESTIC_PASSWORD +The password to access the repository. In a kubernetes environment this should +be made available via a secret and not directly in the kubernetes config. + +### B2_ACCOUNT_ID +The account id of the account that has write access to the backblaze repository. + +### B2_ACCOUNT_KEY +The secret account key of the account that has write access to the backblaze +repository. + +### TGBS_BACKUP_LOCK +If this is set to `1`, the `--no-lock` flag will not be set. + +### TGBS_BACKUP_TAGS +If this is set, then the backup is tagged with the value of this environment +variable. Different tags can be comma-separated. If the variable is not set, +then the backup is not tagged. + +### TGBS_BACKUP_PATH +If this is specified, create a backup of the given path (either a directory or +file). + +## PostgreSQL database backup +To create a PostgreSQL database backup, set the `TGBS_PSQL_BACKUP` to `1`. +To configure the database connection, use the environment variables available +to postgresql clients: https://www.postgresql.org/docs/current/libpq-envars.html + +Generally you will want to set these environment variables for a simple database +backup: + + TGBS_PSQL_BACKUP=1 + PGHOST=somehost + PGDATABASE=mydatabase + PGUSER=myuser + PGPASSWORD=password + +Here is a full list of environment variable this image listens for: + +### TGBS_PSQL_BACKUP +Set this variable to `1` to enable backups of PostgreSQL. + +### TGBS_PSQL_BACKUP_TAGS +If this is set, this overrides the tags for the PostgreSQL specific part of the +backup. This variable works the same as the `TGBS_BACKUP_TAGS` variable. + +### TGBS_PSQL_BACKUP_JOBS +Set the number of jobs to backup. By default this will be the number of cores +available to the backup container. + +### TGBS_PSQL_BACKUP_OWNER +Set this variable to `1` to backup owner information. This is not done by +default. + +### TGBS_PSQL_BACKUP_PRIVILEGES +Set this variable to `1` to backup privilege information (grants). This is not +done by default. + +### TGBS_PSQL_BACKUP_FORMAT +Set this variable to `c` to change the backup format to the custom format, which +will result in a single file instead of a directory. In most cases the +directory format is more suited for backup using restic. + +### TGBS_PSQL_BACKUP_COMPRESS +Set the compression level to a number between `0` (no compression) and +`9` (maximum compression). + +### PGURL +Instead of specifying the `PGHOST`, `PGPORT`, `PGDATABASE`, `PGUSER` and +`PGPASSWORD` environment variables individually, you can also specify the +`PGURL` variable as an (non-standard) alternative. diff --git a/bin/backup.sh b/bin/backup.sh new file mode 100755 index 0000000..74760f8 --- /dev/null +++ b/bin/backup.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +set -eo pipefail + +/usr/local/bin/docker-psql-backup.sh +/usr/local/bin/docker-file-backup.sh diff --git a/bin/docker-file-backup.sh b/bin/docker-file-backup.sh new file mode 100755 index 0000000..5e48444 --- /dev/null +++ b/bin/docker-file-backup.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +set -eo pipefail + +echoerr() { echo "$@" 1>&2; } + +if [ ! -z "$TGBS_BACKUP_PATH" ]; then + if [ -z "$RESTIC_REPOSITORY" ]; then + echoerr "--- ERROR: No restic repository provided, you must provide one" + exit 1 + fi + echoerr "--- Creating file/directory backup" + + backup_cmd=( restic backup ) + + # Add tags to the backup + TGBS_BACKUP_TAGS_CLEAN=() + if [ ! -z "$TGBS_BACKUP_TAGS" ]; then + IFS=',' tag_list=("$TGBS_BACKUP_TAGS") + for tag in ${tag_list[@]}; do + tag="${tag#"${tag%%[![:space:]]*}"}" + tag="${tag%"${tag##*[![:space:]]}"}" + backup_cmd+=( --tag "'$tag'" ) + TGBS_BACKUP_TAGS_CLEAN+=( "$tag" ) + done + fi + + TGBS_BACKUP_LOCKFILE=true + if [ -z "$TGBS_BACKUP_LOCK" ] || [ "$TGBS_BACKUP_LOCK" == "0" ] || [ "$TGBS_BACKUP_LOCK" == "false" ]; then + backup_cmd+=( "--no-lock" ) + TGBS_BACKUP_LOCKFILE=false + fi + + backup_cmd+=( "$TGBS_BACKUP_PATH" ) + + if [ "${#TGBS_BACKUP_TAGS_CLEAN[@]}" -gt 0 ]; then + IFS=',' echoerr "--- Using tags for restic snapshot: ${TGBS_BACKUP_TAGS_CLEAN[*]}" + fi + if [ "${TGBS_BACKUP_LOCKFILE}" = false ]; then + echoerr "--- WARNING: Not using a lockfile" + fi + echoerr "--- Creating restic snapshot from $TGBS_BACKUP_PATH to repository $RESTIC_REPOSITORY" + + # Run the restic command + eval "${backup_cmd[@]}" +else + echoerr "--- Not creating file/directory backup" +fi diff --git a/bin/docker-psql-backup.sh b/bin/docker-psql-backup.sh new file mode 100755 index 0000000..668b25a --- /dev/null +++ b/bin/docker-psql-backup.sh @@ -0,0 +1,190 @@ +#!/usr/bin/env bash + +set -eo pipefail + +MYDIR="$(dirname "$(readlink -f "$0")")" + +echoerr() { echo "$@" 1>&2; } +if [ -n "$TGBS_PSQL_BACKUP" ] && [ "$TGBS_PSQL_BACKUP" != "0" ] && [ "$TGBS_PSQL_BACKUP" != "false" ]; then + if [ -z "$RESTIC_REPOSITORY" ]; then + echoerr "--- ERROR: No restic repository provided, you must provide one" + exit 1 + fi + + # Convert a postgresql database url to the individual database parameters + eval "$("$MYDIR/pgurlparams.py")" + + # Determine the list of databases to backup + if [ -n "$TGBS_PSQL_BACKUP_ALL" ] && [ "$TGBS_PSQL_BACKUP_ALL" != "0" ] && [ "$TGBS_PSQL_BACKUP_ALL" != "false" ]; then + databases=$(psql -t -A -c "SELECT datname FROM pg_database WHERE datallowconn = true AND datistemplate = false AND has_database_privilege(datname, 'CREATE');") + else + if [ -z "$PGDATABASE" ]; then + PGDATABASE=$(psql -t -A -c 'SELECT current_database()') + fi + + databases=("$PGDATABASE") + fi + + # Storage for all backup filenames + TGBS_PSQL_BACKUP_FILENAMES=() + + # Create the dump directory + dump_dir="/tmp/psql" + mkdir -p "$dump_dir" + + # Determine the base arguments for the pg_dump command + base_args=() + + # Set whether to backup the owner information + if [ -z "$TGBS_PSQL_BACKUP_OWNER" ] || [ "$TGBS_PSQL_BACKUP_OWNER" == "0" ] || [ "$TGBS_PSQL_BACKUP_OWNER" == "false" ]; then + base_args+=( "--no-owner" ) + TGBS_PSQL_BACKUP_OWNER=false + else + TGBS_PSQL_BACKUP_OWNER=true + fi + + # Set whether to backup the privilege (grants) information + if [ -z "$TGBS_PSQL_BACKUP_PRIVILEGES" ] || [ "$TGBS_PSQL_BACKUP_PRIVILEGES" == "0" ] || [ "$TGBS_PSQL_BACKUP_PRIVILEGES" == "false" ]; then + base_args+=( "--no-privileges" ) + TGBS_PSQL_BACKUP_PRIVILEGES=false + else + TGBS_PSQL_BACKUP_PRIVILEGES=true + fi + + # Determine the compression level and the backup format + if [ -z "$TGBS_PSQL_BACKUP_COMPRESS" ]; then + TGBS_PSQL_BACKUP_COMPRESS=9 + fi + if ! [[ "$TGBS_PSQL_BACKUP_COMPRESS" =~ ^[+-]?[0-9]+$ ]]; then + echoerr "--- WARNING: Compression level must be an integer, setting to 9" + TGBS_PSQL_BACKUP_COMPRESS=9 + fi + if [ "$TGBS_PSQL_BACKUP_COMPRESS" -lt 0 ]; then + echoerr "--- WARNING: Compression level must be between 0 and 9, setting to 0" + TGBS_PSQL_BACKUP_COMPRESS=0 + fi + if [ "$TGBS_PSQL_BACKUP_COMPRESS" -gt 9 ]; then + echoerr "--- WARNING: Compression level must be between 0 and 9, setting to 9" + TGBS_PSQL_BACKUP_COMPRESS=9 + fi + + # Determine the backup format + if [ -z "$TGBS_PSQL_BACKUP_FORMAT" ]; then + TGBS_PSQL_BACKUP_FORMAT="d" + fi + TGBS_PSQL_BACKUP_FORMAT=$(echo "$TGBS_PSQL_BACKUP_FORMAT" | tr '[:upper:]' '[:lower:]') + format_matched=false + for fmt in "c" "d" "t" "p"; do + if [ "$fmt" == "$TGBS_PSQL_BACKUP_FORMAT" ]; then + format_matched=true + fi + done + if [ "$format_matched" == false ]; then + echoerr "--- WARNING: Unknown format, using directory format instead" + TGBS_PSQL_BACKUP_FORMAT="d" + fi + if [ "$TGBS_PSQL_BACKUP_FORMAT" == "t" ] || [ "$TGBS_PSQL_BACKUP_FORMAT" == "p" ]; then + TGBS_PSQL_BACKUP_COMPRESS=0 # Compression not supported for tar or SQL + fi + base_args+=( "--format=$TGBS_PSQL_BACKUP_FORMAT" "--compress=$TGBS_PSQL_BACKUP_COMPRESS" ) + + # Determine the number of jobs to run + proc_count="$(nproc)" + if [ "$TGBS_PSQL_BACKUP_FORMAT" != "d" ]; then + proc_count=1 + fi + if [ -z "$TGBS_PSQL_BACKUP_JOBS" ]; then + TGBS_PSQL_BACKUP_JOBS="$proc_count" + fi + if ! [[ "$TGBS_PSQL_BACKUP_JOBS" =~ ^[+-]?[0-9]+$ ]]; then + echoerr "--- WARNING: Number of jobs must be an integer, setting to $proc_count" + TGBS_PSQL_BACKUP_JOBS="$proc_count" + fi + if [ "$TGBS_PSQL_BACKUP_JOBS" -lt 1 ]; then + echoerr "--- WARNING: Must have at least 1 job, setting to 1" + TGBS_PSQL_BACKUP_JOBS=1 + fi + base_args+=( "--jobs=$TGBS_PSQL_BACKUP_JOBS" ) + + # Run the backup command for each individual database + IFS=$'\n' + + for db in $databases; do + echoerr "--- Creating PostgreSQL backup for database $db" + psql_cmd=( "pg_dump" ) + + # Switch over the format to determine the filename + case "$TGBS_PSQL_BACKUP_FORMAT" in + c) + TGBS_PSQL_BACKUP_FORMAT_NAME=custom + TGBS_PSQL_BACKUP_FILENAME="$dump_dir/$db.dump" + ;; + d) + TGBS_PSQL_BACKUP_FORMAT_NAME=directory + TGBS_PSQL_BACKUP_FILENAME="$dump_dir/$db" + ;; + t) + TGBS_PSQL_BACKUP_FORMAT_NAME=tar + TGBS_PSQL_BACKUP_FILENAME="$dump_dir/$db.tar" + ;; + p) + TGBS_PSQL_BACKUP_FORMAT_NAME=plain-text + TGBS_PSQL_BACKUP_FILENAME="$dump_dir/$db.sql" + ;; + esac + psql_cmd+=( "--file='$TGBS_PSQL_BACKUP_FILENAME'" ) + + # Add the base arguments + psql_cmd+=( "${base_args[@]}" ) + + # Notify that we start creating the backup now + echoerr "--- Creating a database backup of $db using $TGBS_PSQL_BACKUP_JOBS jobs in format $TGBS_PSQL_BACKUP_FORMAT_NAME" + echoerr "--- Saving backup of database $db to $TGBS_PSQL_BACKUP_FILENAME" + + # Run the pg_dump command + PGDATABASE="$db" eval "${psql_cmd[@]}" + + # Add the filename to the list of files + TGBS_PSQL_BACKUP_FILENAMES+=( "$TGBS_PSQL_BACKUP_FILENAME" ) + done + + backup_cmd=( restic backup ) + + # Add tags to the backup + TGBS_PSQL_BACKUP_TAGS="${TGBS_PSQL_BACKUP_TAGS:-${TGBS_BACKUP_TAGS}}" + TGBS_PSQL_BACKUP_TAGS_CLEAN=() + if [ -n "$TGBS_PSQL_BACKUP_TAGS" ]; then + IFS=',' tag_list=("$TGBS_PSQL_BACKUP_TAGS") + for tag in "${tag_list[@]}"; do + tag="${tag#"${tag%%[![:space:]]*}"}" + tag="${tag%"${tag##*[![:space:]]}"}" + backup_cmd+=( --tag "'$tag'" ) + TGBS_PSQL_BACKUP_TAGS_CLEAN+=( "$tag" ) + done + fi + + # Add option for the --no-lock command line flag for restic added by tg + TGBS_PSQL_BACKUP_LOCKFILE=true + if [ -z "$TGBS_BACKUP_LOCK" ] || [ "$TGBS_BACKUP_LOCK" == "0" ] || [ "$TGBS_BACKUP_LOCK" == "false" ]; then + backup_cmd+=( "--no-lock" ) + TGBS_PSQL_BACKUP_LOCKFILE=false + fi + + # Backup all previously created psql files + backup_cmd+=( "${TGBS_PSQL_BACKUP_FILENAMES[@]}" ) + + if [ "${#TGBS_PSQL_BACKUP_TAGS_CLEAN[@]}" -gt 0 ]; then + IFS=',' echoerr "--- Using tags for restic snapshot: ${TGBS_PSQL_BACKUP_TAGS_CLEAN[*]}" + fi + if [ "${TGBS_PSQL_BACKUP_LOCKFILE}" = false ]; then + echoerr "--- WARNING: Not using a lockfile" + fi + echoerr "--- Creating restic snapshot from PostgreSQL backup to repository $RESTIC_REPOSITORY" + + # Run the restic command + eval "${backup_cmd[@]}" + + echoerr "--- PostgreSQL backup complete" +else + echoerr "--- Not creating PostgreSQL backup" +fi diff --git a/bin/pgurlparams.py b/bin/pgurlparams.py new file mode 100755 index 0000000..bae14a7 --- /dev/null +++ b/bin/pgurlparams.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +from urllib.parse import urlparse +import os + +if 'PGURL' in os.environ: + result = urlparse(os.environ['PGURL']) + if result.scheme == 'pg' or result.scheme == 'postgres' or result.scheme == 'pgsql' or result.scheme == 'psql' or result.scheme == 'postgresql': + os.environ['PGHOST'] = result.hostname + if result.username is not None: + os.environ['PGUSER'] = result.username + if result.password is not None: + os.environ['PGPASSWORD'] = result.password + if result.port is not None: + os.environ['PGPORT'] = "{}".format(result.port) + if len(result.path) > 1 and result.path[0] == '/': + os.environ['PGDATABASE'] = result.path[1:] + +if 'PGHOST' in os.environ: + print("export PGHOST=\"{}\"".format(os.environ['PGHOST'])) + +if 'PGUSER' in os.environ: + print("export PGUSER=\"{}\"".format(os.environ['PGUSER'])) + +if 'PGPASSWORD' in os.environ: + print("export PGPASSWORD=\"{}\"".format(os.environ['PGPASSWORD'])) + +if 'PGPORT' in os.environ: + print("export PGPORT=\"{}\"".format(os.environ['PGPORT'])) + +if 'PGDATABASE' in os.environ: + print("export PGDATABASE=\"{}\"".format(os.environ['PGDATABASE'])) +