From bc880b319ffed7c00f85994dc0bf48a567903f69 Mon Sep 17 00:00:00 2001 From: Tyler Gillson Date: Tue, 11 Jun 2024 15:35:44 -0600 Subject: [PATCH] PAD-173: 2 node HA - Tech Preview (#131) * PAD-156: Add custom health check script * PAD-156: Update user-data template, include health-check-script * Add E2E automation, debug helpers (#94) * automate e2e provisioning * chore: bump provider-k3s version --------- Signed-off-by: Oz Tiram Signed-off-by: Tyler Gillson Signed-off-by: Oz N Tiram Co-authored-by: Oz Tiram --- .gitignore | 6 + Dockerfile | 14 +- Earthfile | 66 +- hack/Earthfile | 16 + hack/README.md | 19 + hack/build/.keep | 0 hack/launch-qemu.sh | 25 + .../opt/spectrocloud/bin/check-disk-size.sh | 15 + test/env.example | 42 ++ .../two-node-cluster-profile.json.tmpl | 64 ++ .../two-node-master-master.json.tmpl | 109 ++++ test/templates/two-node-update.json.tmpl | 44 ++ test/test-two-node.sh | 575 ++++++++++++++++++ 13 files changed, 974 insertions(+), 21 deletions(-) create mode 100644 hack/Earthfile create mode 100644 hack/README.md create mode 100644 hack/build/.keep create mode 100755 hack/launch-qemu.sh create mode 100755 overlay/files/opt/spectrocloud/bin/check-disk-size.sh create mode 100644 test/env.example create mode 100644 test/templates/two-node-cluster-profile.json.tmpl create mode 100644 test/templates/two-node-master-master.json.tmpl create mode 100644 test/templates/two-node-update.json.tmpl create mode 100755 test/test-two-node.sh diff --git a/.gitignore b/.gitignore index 8995b27..c67f2b5 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,12 @@ content-*/* *.arg .idea .DS_Store + +hack/*.img +test/.env +two-node-create.json +two-node-update.json + build/ local/ keys/ diff --git a/Dockerfile b/Dockerfile index cc80b2a..db0dbf3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,7 +17,7 @@ RUN if [ "${OS_DISTRIBUTION}" = "opensuse-leap" ] && [ "${PROXY_CERT_PATH}" != " update-ca-certificates; \ fi -###########################Add any other image customizations here ####################### +########################### Add any other image customizations here ####################### #### Examples #### @@ -37,8 +37,18 @@ RUN if [ "${OS_DISTRIBUTION}" = "opensuse-leap" ] && [ "${PROXY_CERT_PATH}" != " # && apt-get clean # RUN apt-get update && apt-get install nginx -y -### or ### To install the nginx package for opensuse ### # RUN zypper refresh && zypper install nginx -y + +### To add a custom health script for two-node liveness checks ### + +# ADD overlay/files/opt/spectrocloud/bin/check-disk-size.sh /opt/spectrocloud/bin/ + +### To install wifi prerequisites for Ubuntu ### + +# RUN apt-get update && apt-get install wpasupplicant -y && \ +# apt-get update && apt-get install network-manager -y && \ +# apt-get install iputils-ping -y && \ +# mkdir /var/lib/wpa \ No newline at end of file diff --git a/Earthfile b/Earthfile index 5cc98f6..b91ed9f 100644 --- a/Earthfile +++ b/Earthfile @@ -2,7 +2,7 @@ VERSION 0.6 ARG TARGETOS ARG TARGETARCH -## Default Image Repos Used in the Builds. +# Default image repositories used in the builds. ARG ALPINE_IMG=gcr.io/spectro-images-public/alpine:3.16.2 ARG SPECTRO_PUB_REPO=gcr.io/spectro-images-public ARG SPECTRO_LUET_REPO=gcr.io/spectro-dev-public @@ -10,7 +10,7 @@ ARG KAIROS_BASE_IMAGE_URL=gcr.io/spectro-images-public ARG ETCD_REPO=https://github.com/etcd-io FROM $SPECTRO_PUB_REPO/canvos/alpine-cert:v1.0.0 -## Spectro Cloud and Kairos Tags ## +# Spectro Cloud and Kairos tags. ARG PE_VERSION=v4.4.1 ARG SPECTRO_LUET_VERSION=v1.3.1 ARG KAIROS_VERSION=v3.0.11 @@ -19,11 +19,11 @@ ARG RKE2_FLAVOR_TAG=rke2r1 ARG BASE_IMAGE_URL=quay.io/kairos ARG OSBUILDER_VERSION=v0.201.0 ARG OSBUILDER_IMAGE=quay.io/kairos/osbuilder-tools:$OSBUILDER_VERSION -ARG K3S_PROVIDER_VERSION=v4.4.0 +ARG K3S_PROVIDER_VERSION=v4.4.1 ARG KUBEADM_PROVIDER_VERSION=v4.4.0 ARG RKE2_PROVIDER_VERSION=v4.4.0 -# Variables used in the builds. Update for ADVANCED use cases only Modify in .arg file or via CLI arguements +# Variables used in the builds. Update for ADVANCED use cases only. Modify in .arg file or via CLI arguments. ARG OS_DISTRIBUTION ARG OS_VERSION ARG K8S_VERSION @@ -48,6 +48,11 @@ ARG no_proxy=${NO_PROXY} ARG PROXY_CERT_PATH ARG UPDATE_KERNEL=false +ARG ETCD_VERSION="v3.5.13" + +# Two node variables +ARG TWO_NODE=false +ARG KINE_VERSION=0.11.4 # UKI Variables ARG IS_UKI=false @@ -57,7 +62,6 @@ ARG UKI_BRING_YOUR_OWN_KEYS=false ARG CMDLINE="stylus.registration" ARG BRANDING="Palette eXtended Kubernetes Edge" -ARG ETCD_VERSION="v3.5.13" # EFI size check ARG EFI_MAX_SIZE=2048 @@ -530,7 +534,7 @@ secure-boot-dirs: RUN chmod 0644 /secure-boot/public-keys SAVE ARTIFACT --keep-ts /secure-boot AS LOCAL ./secure-boot -# Used to create the provider images. The --K8S_VERSION will be passed in the earthly build +# Used to create the provider images. The --K8S_VERSION will be passed in the earthly build. provider-image: FROM --platform=linux/${ARCH} +base-image # added PROVIDER_K8S_VERSION to fix missing image in ghcr.io/kairos-io/provider-* @@ -572,8 +576,30 @@ provider-image: RUN touch /etc/machine-id \ && chmod 444 /etc/machine-id - SAVE IMAGE --push $IMAGE_PATH + IF $TWO_NODE + # Install postgresql 16 + IF [ "$OS_DISTRIBUTION" = "ubuntu" ] && [ "$ARCH" = "amd64" ] + RUN apt install -y ca-certificates curl && \ + install -d /usr/share/postgresql-common/pgdg && \ + curl -o /usr/share/postgresql-common/pgdg/apt.postgresql.org.asc --fail https://www.postgresql.org/media/keys/ACCC4CF8.asc && \ + echo "deb [signed-by=/usr/share/postgresql-common/pgdg/apt.postgresql.org.asc] https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list && \ + apt update && \ + apt install -y postgresql-16 postgresql-contrib-16 iputils-ping + ELSE IF [ "$OS_DISTRIBUTION" = "opensuse-leap" ] && [ "$ARCH" = "amd64" ] + RUN zypper --non-interactive --quiet addrepo --refresh -p 90 http://download.opensuse.org/repositories/server:database:postgresql/openSUSE_Tumbleweed/ PostgreSQL && \ + zypper --gpg-auto-import-keys ref && \ + zypper install -y postgresql-16 postgresql-server-16 postgresql-contrib iputils + END + + # Install kine + RUN mkdir -p /opt/spectrocloud/bin && \ + curl -L https://github.com/k3s-io/kine/releases/download/v${KINE_VERSION}/kine-amd64 | install -m 755 /dev/stdin /opt/spectrocloud/bin/kine + + # Ensure psql works ootb for the postgres user + RUN su postgres -c 'echo "export PERL5LIB=/usr/share/perl/5.34:/usr/share/perl5:/usr/lib/x86_64-linux-gnu/perl/5.34" > ~/.bash_profile' + END + SAVE IMAGE --push $IMAGE_PATH provider-image-rootfs: FROM --platform=linux/${ARCH} +provider-image @@ -647,6 +673,7 @@ base-image: ARG BASE_K8S_VERSION=$K8S_VERSION-$K8S_DISTRIBUTION_TAG END + # OS == Ubuntu IF [ "$OS_DISTRIBUTION" = "ubuntu" ] && [ "$ARCH" = "amd64" ] IF [ ! -z "$UBUNTU_PRO_KEY" ] RUN sed -i '/^[[:space:]]*$/d' /etc/os-release && \ @@ -696,27 +723,27 @@ base-image: RUN pro detach --assume-yes END - # IF OS Type is Opensuse + # OS == Opensuse ELSE IF [ "$OS_DISTRIBUTION" = "opensuse-leap" ] && [ "$ARCH" = "amd64" ] # Add proxy certificate if present IF [ ! -z $PROXY_CERT_PATH ] COPY sc.crt /usr/share/pki/trust/anchors - RUN update-ca-certificates + RUN update-ca-certificates END # Enable or Disable Kernel Updates IF [ "$UPDATE_KERNEL" = "false" ] RUN zypper al kernel-de* END - RUN zypper refresh && \ - zypper update -y + RUN zypper refresh && zypper update -y - IF [ -e "/usr/bin/dracut" ] - RUN --no-cache kernel=$(ls /lib/modules | tail -n1) && depmod -a "${kernel}" - RUN --no-cache kernel=$(ls /lib/modules | tail -n1) && dracut -f "/boot/initrd-${kernel}" "${kernel}" && ln -sf "initrd-${kernel}" /boot/initrd - END - RUN zypper install -y zstd vim iputils bridge-utils curl ethtool tcpdump - RUN zypper cc && \ + IF [ -e "/usr/bin/dracut" ] + RUN --no-cache kernel=$(ls /lib/modules | tail -n1) && depmod -a "${kernel}" + RUN --no-cache kernel=$(ls /lib/modules | tail -n1) && dracut -f "/boot/initrd-${kernel}" "${kernel}" && ln -sf "initrd-${kernel}" /boot/initrd + END + + RUN zypper install -y zstd vim iputils bridge-utils curl ethtool tcpdump && \ + zypper cc && \ zypper clean END @@ -726,6 +753,7 @@ base-image: zypper clean RUN if [ ! -e /usr/bin/apparmor_parser ]; then cp /sbin/apparmor_parser /usr/bin/apparmor_parser; fi END + IF [ "$ARCH" = "arm64" ] ARG LUET_REPO=luet-repo-arm ELSE IF [ "$ARCH" = "amd64" ] @@ -738,7 +766,7 @@ base-image: RUN --no-cache if [ -f spectro-luet-auth.yaml ]; then cat spectro-luet-auth.yaml >> /etc/luet/repos.conf.d/spectro.yaml; fi RUN --no-cache luet repo update - IF [ "$OS_DISTRIBUTION" = "rhel" ] + IF [ "$OS_DISTRIBUTION" = "rhel" ] RUN yum install -y openssl END @@ -762,7 +790,7 @@ base-image: if grep "selinux=1" /etc/cos/bootargs.cfg > /dev/null; then sed -i 's/selinux=1/selinux=0/g' /etc/cos/bootargs.cfg; fi END -# Used to build the installer image. The installer ISO will be created from this. +# Used to build the installer image. The installer ISO will be created from this. iso-image: FROM --platform=linux/${ARCH} +base-image IF [ "$IS_UKI" = "false" ] diff --git a/hack/Earthfile b/hack/Earthfile new file mode 100644 index 0000000..c3118fa --- /dev/null +++ b/hack/Earthfile @@ -0,0 +1,16 @@ +VERSION 0.6 + +ARG OSBUILDER_VERSION=v0.7.11 +ARG OSBUILDER_IMAGE=quay.io/kairos/osbuilder-tools:$OSBUILDER_VERSION +ARG ISO_NAME=debug + +# replace with your CanvOS provider image +ARG PROVIDER_IMAGE=oci:tylergillson/ubuntu:k3s-1.26.4-v4.0.4-071c2c23 + +build: + FROM $OSBUILDER_IMAGE + WORKDIR /build + COPY . ./ + + RUN /entrypoint.sh --name $ISO_NAME --debug build-iso --squash-no-compression --date=false $PROVIDER_IMAGE --output /build/ + SAVE ARTIFACT /build/$ISO_NAME.iso kairos.iso AS LOCAL build/$ISO_NAME.iso diff --git a/hack/README.md b/hack/README.md new file mode 100644 index 0000000..4557131 --- /dev/null +++ b/hack/README.md @@ -0,0 +1,19 @@ +# Debugging Kairos + +If you're facing hard-to-diagnose issues with your custom provider image, you can use the scripts in this directory to obtain verbose Kairos output. + +## Steps +1. Use earthly to generate an ISO from your CanvOS provider image: + ``` + earthly +build --PROVIDER_IMAGE= # e.g., oci:tylergillson/ubuntu:k3s-1.26.4-v4.0.4-071c2c23 + ``` + If successful, `build/debug.iso` will be created. + +2. Launch a local VM based on the debug ISO using QEMU and pipe all output to a log file: + ``` + ./launch-qemu.sh build/debug.iso | tee out.log + ``` + +3. Boot the VM in `Kairos (manual)` mode. Once booted, create `userdata.yaml` with your desired Kairos config and execute a manual Kairos installation: `kairos-agent --debug manual-install --device auto userdata.yaml`. + +4. The VM should eventually reboot itself once the installation completes. Rather than waiting, execute `reboot` to return to the GRUB menu, select `Palette eXtended Kubernetes Edge` and hit `e` to edit it. Add `rd.debug rd.immucore.debug` to the end of the `linux` line, then hit `CTRL+x` to boot with your edits. You should see verbose Kairos debug logs and they will be persisted to `out.log`. diff --git a/hack/build/.keep b/hack/build/.keep new file mode 100644 index 0000000..e69de29 diff --git a/hack/launch-qemu.sh b/hack/launch-qemu.sh new file mode 100755 index 0000000..9aaba67 --- /dev/null +++ b/hack/launch-qemu.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Screenshot capability: +# https://unix.stackexchange.com/a/476617 + +if [ ! -e disk.img ]; then + qemu-img create -f qcow2 disk.img 60g +fi + +# -nic bridge,br=br0,model=virtio-net-pci \ +qemu-system-x86_64 \ + -enable-kvm \ + -cpu "${CPU:=host}" \ + -nographic \ + -spice port=9000,addr=127.0.0.1,disable-ticketing=yes \ + -m ${MEMORY:=10096} \ + -smp ${CORES:=5} \ + -monitor unix:/tmp/qemu-monitor.sock,server=on,wait=off \ + -serial mon:stdio \ + -rtc base=utc,clock=rt \ + -chardev socket,path=qga.sock,server=on,wait=off,id=qga0 \ + -device virtio-serial \ + -device virtserialport,chardev=qga0,name=org.qemu.guest_agent.0 \ + -drive if=virtio,media=disk,file=disk.img \ + -drive if=ide,media=cdrom,file="${1}" diff --git a/overlay/files/opt/spectrocloud/bin/check-disk-size.sh b/overlay/files/opt/spectrocloud/bin/check-disk-size.sh new file mode 100755 index 0000000..6dab5a6 --- /dev/null +++ b/overlay/files/opt/spectrocloud/bin/check-disk-size.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +set -e + +REQUIRED_FREE_DISK=$1 + +FREE=$(df -h --output=pcent /var/ | tail -n 1 | tr -d '\% ') + +if (( $FREE < $REQUIRED_FREE_DISK )); then + echo "Not enough free disk, required: $1. Free: $FREE" + exit 1 +fi + +echo "Free disk ok, required: $1. Free: $FREE" +exit 0 diff --git a/test/env.example b/test/env.example new file mode 100644 index 0000000..6de9f39 --- /dev/null +++ b/test/env.example @@ -0,0 +1,42 @@ +# govc vars +export GOVC_USERNAME=@vsphere.local +export GOVC_PASSWORD= +export GOVC_URL=10.10.128.10 # IP address of USDC; edit as needed +export GOVC_INSECURE=true +export GOVC_DATACENTER=Datacenter +export GOVC_DATASTORE=vsanDatastore2 +export GOVC_NETWORK=VM-NETWORK +export GOVC_RESOURCE_POOL= +export GOVC_FOLDER= + +# vSphere vars +export HOST_SUFFIX=-$(git -C ../stylus describe --always) # required to ensure unique edge host IDs +export ISO_FOLDER= e.g. "ISO/01-tyler" +export STYLUS_ISO="${ISO_FOLDER}/stylus-dev-amd64.iso" +export NIC_NAME=ens160 + +# networking vars +export PROXY= # set any value to use Spectro's basic proxy +export WIFI_NETWORK= +export WIFI_PASSWORD= + +# palette vars +export API_KEY= +export PROJECT_UID= +export EDGE_REGISTRATION_TOKEN= +export DOMAIN=dev.spectrocloud.com +export PUBLIC_PACK_REPO_UID= # this varies per Palette tenant, identify via Chrome inspector on Tenant Admin -> Pack Registries page +export CLUSTER_NAME=two-node--$(git -C ../stylus describe --always) +export CLUSTER_PROFILE_UID= # if left blank, a cluster profile will be created +export CLUSTER_VIP= # choose an unassigned VIP + +# image vars +export EARTHLY_BUILDKIT_CACHE_SIZE_MB=500000 +export OCI_REGISTRY=${OCI_REGISTRY:-ttl.sh} +export STYLUS_BRANCH=${STYLUS_BRANCH:-2-node} +export PROVIDER_K3S_BRANCH=${PROVIDER_K3S_BRANCH:-two-node} +export K3S_VERSION="1.28.5" +export PE_VERSION="4.3.0-2node" + +# two node vars +export TWO_NODE_BACKEND=postgres diff --git a/test/templates/two-node-cluster-profile.json.tmpl b/test/templates/two-node-cluster-profile.json.tmpl new file mode 100644 index 0000000..5f68898 --- /dev/null +++ b/test/templates/two-node-cluster-profile.json.tmpl @@ -0,0 +1,64 @@ +{ + "metadata": { + "name": "_____place_holder_____", + "description": "", + "labels": {} + }, + "spec": { + "version": "1.0.0", + "template": { + "type": "infra", + "cloudType": "edge-native", + "packs": [ + { + "name": "edge-native-byoi", + "type": "spectro", + "layer": "os", + "version": "1.0.0", + "tag": "1.0.0", + "values": "pack:\n content:\n images:\n - image: \"{{.spectro.pack.edge-native-byoi.options.system.uri}}\"\n # Below config is default value, please uncomment if you want to modify default values\n #drain:\n #cordon: true\n #timeout: 60 # The length of time to wait before giving up, zero means infinite\n #gracePeriod: 60 # Period of time in seconds given to each pod to terminate gracefully. If negative, the default value specified in the pod will be used\n #ignoreDaemonSets: true\n #deleteLocalData: true # Continue even if there are pods using emptyDir (local data that will be deleted when the node is drained)\n #force: true # Continue even if there are pods that do not declare a controller\n #disableEviction: false # Force drain to use delete, even if eviction is supported. This will bypass checking PodDisruptionBudgets, use with caution\n #skipWaitForDeleteTimeout: 60 # If pod DeletionTimestamp older than N seconds, skip waiting for the pod. Seconds must be greater than 0 to skip.\nstylusPackage: container://OCI_REGISTRY/stylus-linux-amd64:v0.0.0-STYLUS_HASH\noptions:\n system.uri: \"OCI_REGISTRY/ubuntu:k3s-K3S_VERSION-vPE_VERSION-STYLUS_HASH\"", + "registry": { + "metadata": { + "uid": "_____place_holder_____", + "name": "Public Repo", + "kind": "pack", + "isPrivate": false + } + } + }, + { + "name": "edge-k3s", + "type": "spectro", + "layer": "k8s", + "version": "_____place_holder_____", + "tag": "_____place_holder_____", + "values": "cluster:\n config: |\n flannel-backend: host-gw\n disable-network-policy: true\n disable:\n - traefik\n - local-storage\n - servicelb\n - metrics-server\n\n # configure the pod cidr range\n cluster-cidr: \"192.170.0.0/16\"\n\n # configure service cidr range\n service-cidr: \"192.169.0.0/16\"\n\n # etcd snapshot frequency and number of snapshot retained\n etcd-snapshot-schedule-cron: 0 */1 * * *\n etcd-snapshot-retention: 12\n\n # kubeconfig must be in run for the stylus operator to manage the cluster\n write-kubeconfig: /run/kubeconfig\n write-kubeconfig-mode: 600\n\n # additional component settings to harden installation\n kube-apiserver-arg:\n - anonymous-auth=true\n - profiling=false\n - disable-admission-plugins=AlwaysAdmit\n - default-not-ready-toleration-seconds=20\n - default-unreachable-toleration-seconds=20\n - enable-admission-plugins=AlwaysPullImages,NamespaceLifecycle,ServiceAccount,NodeRestriction,DefaultTolerationSeconds\n - audit-log-path=/var/log/apiserver/audit.log\n - audit-policy-file=/etc/kubernetes/audit-policy.yaml\n - audit-log-maxage=30\n - audit-log-maxbackup=10\n - audit-log-maxsize=100\n - authorization-mode=RBAC,Node\n - tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_GCM_SHA256\n kube-controller-manager-arg:\n - profiling=false\n - terminated-pod-gc-threshold=25\n - use-service-account-credentials=true\n - feature-gates=RotateKubeletServerCertificate=true\n - node-monitor-period=5s\n - node-monitor-grace-period=20s\n kube-scheduler-arg:\n - profiling=false\n kubelet-arg:\n - read-only-port=0\n - event-qps=0\n - feature-gates=RotateKubeletServerCertificate=true\n - protect-kernel-defaults=true\n - tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_GCM_SHA256\n - rotate-server-certificates=true\nstages:\n initramfs:\n - sysctl:\n vm.overcommit_memory: 1\n kernel.panic: 10\n kernel.panic_on_oops: 1\n kernel.printk: \"0 4 0 7\"\n - directories:\n - path: \"/var/log/apiserver\"\n permissions: 0644\n files:\n - path: /etc/hosts\n permission: \"0644\"\n content: |\n 127.0.0.1 localhost\n - path: \"/etc/kubernetes/audit-policy.yaml\"\n owner_string: \"root\"\n permission: 0600\n content: |\n apiVersion: audit.k8s.io/v1\n kind: Policy\n rules:\n - level: None\n users: [\"system:kube-proxy\"]\n verbs: [\"watch\"]\n resources:\n - group: \"\" # core\n resources: [\"endpoints\", \"services\", \"services/status\"]\n - level: None\n users: [\"system:unsecured\"]\n namespaces: [\"kube-system\"]\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"configmaps\"]\n - level: None\n users: [\"kubelet\"] # legacy kubelet identity\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes\", \"nodes/status\"]\n - level: None\n userGroups: [\"system:nodes\"]\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes\", \"nodes/status\"]\n - level: None\n users:\n - system:kube-controller-manager\n - system:kube-scheduler\n - system:serviceaccount:kube-system:endpoint-controller\n verbs: [\"get\", \"update\"]\n namespaces: [\"kube-system\"]\n resources:\n - group: \"\" # core\n resources: [\"endpoints\"]\n - level: None\n users: [\"system:apiserver\"]\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"namespaces\", \"namespaces/status\", \"namespaces/finalize\"]\n - level: None\n users: [\"cluster-autoscaler\"]\n verbs: [\"get\", \"update\"]\n namespaces: [\"kube-system\"]\n resources:\n - group: \"\" # core\n resources: [\"configmaps\", \"endpoints\"]\n # Don't log HPA fetching metrics.\n - level: None\n users:\n - system:kube-controller-manager\n verbs: [\"get\", \"list\"]\n resources:\n - group: \"metrics.k8s.io\"\n # Don't log these read-only URLs.\n - level: None\n nonResourceURLs:\n - /healthz*\n - /version\n - /swagger*\n # Don't log events requests.\n - level: None\n resources:\n - group: \"\" # core\n resources: [\"events\"]\n # node and pod status calls from nodes are high-volume and can be large, don't log responses for expected updates from nodes\n - level: Request\n users: [\"kubelet\", \"system:node-problem-detector\", \"system:serviceaccount:kube-system:node-problem-detector\"]\n verbs: [\"update\",\"patch\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes/status\", \"pods/status\"]\n omitStages:\n - \"RequestReceived\"\n - level: Request\n userGroups: [\"system:nodes\"]\n verbs: [\"update\",\"patch\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes/status\", \"pods/status\"]\n omitStages:\n - \"RequestReceived\"\n # deletecollection calls can be large, don't log responses for expected namespace deletions\n - level: Request\n users: [\"system:serviceaccount:kube-system:namespace-controller\"]\n verbs: [\"deletecollection\"]\n omitStages:\n - \"RequestReceived\"\n # Secrets, ConfigMaps, and TokenReviews can contain sensitive \u0026 binary data,\n # so only log at the Metadata level.\n - level: Metadata\n resources:\n - group: \"\" # core\n resources: [\"secrets\", \"configmaps\"]\n - group: authentication.k8s.io\n resources: [\"tokenreviews\"]\n omitStages:\n - \"RequestReceived\"\n # Get repsonses can be large; skip them.\n - level: Request\n verbs: [\"get\", \"list\", \"watch\"]\n resources:\n - group: \"\" # core\n - group: \"admissionregistration.k8s.io\"\n - group: \"apiextensions.k8s.io\"\n - group: \"apiregistration.k8s.io\"\n - group: \"apps\"\n - group: \"authentication.k8s.io\"\n - group: \"authorization.k8s.io\"\n - group: \"autoscaling\"\n - group: \"batch\"\n - group: \"certificates.k8s.io\"\n - group: \"extensions\"\n - group: \"metrics.k8s.io\"\n - group: \"networking.k8s.io\"\n - group: \"policy\"\n - group: \"rbac.authorization.k8s.io\"\n - group: \"settings.k8s.io\"\n - group: \"storage.k8s.io\"\n omitStages:\n - \"RequestReceived\"\n # Default level for known APIs\n - level: RequestResponse\n resources:\n - group: \"\" # core\n - group: \"admissionregistration.k8s.io\"\n - group: \"apiextensions.k8s.io\"\n - group: \"apiregistration.k8s.io\"\n - group: \"apps\"\n - group: \"authentication.k8s.io\"\n - group: \"authorization.k8s.io\"\n - group: \"autoscaling\"\n - group: \"batch\"\n - group: \"certificates.k8s.io\"\n - group: \"extensions\"\n - group: \"metrics.k8s.io\"\n - group: \"networking.k8s.io\"\n - group: \"policy\"\n - group: \"rbac.authorization.k8s.io\"\n - group: \"settings.k8s.io\"\n - group: \"storage.k8s.io\"\n omitStages:\n - \"RequestReceived\"\n # Default level for all other requests.\n - level: Metadata\n omitStages:\n - \"RequestReceived\"\npack:\n palette:\n config:\n oidc:\n identityProvider: noauth", + "registry": { + "metadata": { + "uid": "_____place_holder_____", + "name": "Public Repo", + "kind": "pack", + "isPrivate": false + } + } + }, + { + "name": "cni-custom", + "type": "spectro", + "layer": "cni", + "version": "0.1.0", + "tag": "0.1.0", + "values": "manifests:\n byo-cni:\n contents: |\n apiVersion: v1\n kind: ConfigMap\n metadata:\n name: custom-cni\n data:\n # property-like keys; each key maps to a simple value\n custom-cni: \"byo-cni\"", + "registry": { + "metadata": { + "uid": "_____place_holder_____", + "name": "Public Repo", + "kind": "pack", + "isPrivate": false + } + } + } + ] + } + } +} diff --git a/test/templates/two-node-master-master.json.tmpl b/test/templates/two-node-master-master.json.tmpl new file mode 100644 index 0000000..1dcd625 --- /dev/null +++ b/test/templates/two-node-master-master.json.tmpl @@ -0,0 +1,109 @@ +{ + "metadata": { + "annotations": {}, + "name": "_____place_holder_____", + "labels": {} + }, + "spec": { + "cloudConfig": { + "controlPlaneEndpoint": { + "host": "_____place_holder_____", + "type": "IP" + }, + "sshKeys": [ + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDDYZpId/d19xuzNnbjkWxlTvctATcwYz+Fre3qOUkkrFJljx39pduukR38Pms8oeEPk6B+GBzwARk9xkEK2SUW+B6ZzCVaBXMHzLiuyzYK9mcHLEnSaYIT7njdAqcFzpBamkPkhUAfsWDcsjgnz0Q7Ilmdi42MW1mqR9M+FibB89Qg/EdFxD0J+VtD/MOZfSDPMEQ+azZMgcWRICn1N9Ods3uH8FCF+PAwVEBo19x34P5xqIyZ7QJjFvNoV96Sr8JuUJWXzMJ6R+7HbH5BMceRsDVd+ZUSX5tQDG4nPrWRVdJN3stLtLNADprXV5BSrDaMOqWK034Or4AI+sqTvmHIBy/b0U4dWAQiJWD6QkLG673UG2qwyZ4GJI4D0KkR7Frj2zwtcufnwHop69R36uJn5xkjJUG92B5GbfolbSjzo0PsQ+Q5NKRJDZZ7conw5RkRb4DYrt17D6BZKbw0X5Gd22MdgPPcnjs4JiZTeKXGkM0kDlTD5jjA4nCs6IEQhI1QLiicHLO5algTf1JHyRUgdMbJA0zlVITDtid3cvRup3JpZW9cdxu3NTqsRRauZj33mfpeRLnuJ2y+cLaWBkkAPpjO87/caUezJJ0r3qzXkIXLu4zCe1RRoZfERUlGvLK+LRUC8IadFTGJl6UhJBApe1UydydOakK45uUBAkDYfw== spectro2023" + ], + "staticIp": false + }, + "machinePoolConfig": [ + { + "cloudConfig": { + "edgeHosts": [ + { + "hostName": "_____place_holder_____", + "hostUid": "_____place_holder_____", + "nicName": "_____place_holder_____", + "staticIP": "", + "twoNodeCandidatePriority": "primary" + }, + { + "hostName": "_____place_holder_____", + "hostUid": "_____place_holder_____", + "nicName": "_____place_holder_____", + "staticIP": "", + "twoNodeCandidatePriority": "secondary" + } + ] + }, + "poolConfig": { + "name": "master-pool", + "labels": [ + "master" + ], + "isControlPlane": true, + "useControlPlaneAsWorker": true, + "taints": [], + "additionalLabels": {}, + "nodeRepaveInterval": 0, + "updateStrategy": { + "type": "RollingUpdateScaleOut" + }, + "machinePoolProperties": { + "archType": "amd64" + }, + "size": 2, + "maxSize": 2, + "minSize": 2 + } + } + ], + "cloudAccountUid": null, + "edgeHostUid": "", + "profiles": [ + { + "uid": "_____place_holder_____", + "packValues": [ + { + "tag": "1.0.0", + "name": "edge-native-byoi", + "type": "spectro", + "values": "pack:\n content:\n images:\n - image: \"{{.spectro.pack.edge-native-byoi.options.system.uri}}\"\n # Below config is default value, please uncomment if you want to modify default values\n #drain:\n #cordon: true\n #timeout: 60 # The length of time to wait before giving up, zero means infinite\n #gracePeriod: 60 # Period of time in seconds given to each pod to terminate gracefully. If negative, the default value specified in the pod will be used\n #ignoreDaemonSets: true\n #deleteLocalData: true # Continue even if there are pods using emptyDir (local data that will be deleted when the node is drained)\n #force: true # Continue even if there are pods that do not declare a controller\n #disableEviction: false # Force drain to use delete, even if eviction is supported. This will bypass checking PodDisruptionBudgets, use with caution\n #skipWaitForDeleteTimeout: 60 # If pod DeletionTimestamp older than N seconds, skip waiting for the pod. Seconds must be greater than 0 to skip.\nstylusPackage: container://OCI_REGISTRY/stylus-linux-amd64:v0.0.0-STYLUS_HASH\noptions:\n system.uri: \"OCI_REGISTRY/ubuntu:k3s-K3S_VERSION-vPE_VERSION-STYLUS_HASH\"", + "manifests": [] + }, + { + "tag": "_____place_holder_____", + "name": "edge-k3s", + "type": "spectro", + "values": "cluster:\n config: |\n flannel-backend: host-gw\n disable-network-policy: true\n disable:\n - traefik\n - local-storage\n - servicelb\n - metrics-server\n\n # configure the pod cidr range\n cluster-cidr: \"192.170.0.0/16\"\n\n # configure service cidr range\n service-cidr: \"192.169.0.0/16\"\n\n # etcd snapshot frequency and number of snapshot retained\n etcd-snapshot-schedule-cron: 0 */1 * * *\n etcd-snapshot-retention: 12\n\n # kubeconfig must be in run for the stylus operator to manage the cluster\n write-kubeconfig: /run/kubeconfig\n write-kubeconfig-mode: 600\n\n # additional component settings to harden installation\n kube-apiserver-arg:\n - anonymous-auth=true\n - profiling=false\n - disable-admission-plugins=AlwaysAdmit\n - default-not-ready-toleration-seconds=20\n - default-unreachable-toleration-seconds=20\n - enable-admission-plugins=AlwaysPullImages,NamespaceLifecycle,ServiceAccount,NodeRestriction,DefaultTolerationSeconds\n - audit-log-path=/var/log/apiserver/audit.log\n - audit-policy-file=/etc/kubernetes/audit-policy.yaml\n - audit-log-maxage=30\n - audit-log-maxbackup=10\n - audit-log-maxsize=100\n - authorization-mode=RBAC,Node\n - tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_GCM_SHA256\n kube-controller-manager-arg:\n - profiling=false\n - terminated-pod-gc-threshold=25\n - use-service-account-credentials=true\n - feature-gates=RotateKubeletServerCertificate=true\n - node-monitor-period=5s\n - node-monitor-grace-period=20s\n kube-scheduler-arg:\n - profiling=false\n kubelet-arg:\n - read-only-port=0\n - event-qps=0\n - feature-gates=RotateKubeletServerCertificate=true\n - protect-kernel-defaults=true\n - tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_GCM_SHA256\n - rotate-server-certificates=true\nstages:\n initramfs:\n - sysctl:\n vm.overcommit_memory: 1\n kernel.panic: 10\n kernel.panic_on_oops: 1\n kernel.printk: \"0 4 0 7\"\n - directories:\n - path: \"/var/log/apiserver\"\n permissions: 0644\n files:\n - path: /etc/hosts\n permission: \"0644\"\n content: |\n 127.0.0.1 localhost\n - path: \"/etc/kubernetes/audit-policy.yaml\"\n owner_string: \"root\"\n permission: 0600\n content: |\n apiVersion: audit.k8s.io/v1\n kind: Policy\n rules:\n - level: None\n users: [\"system:kube-proxy\"]\n verbs: [\"watch\"]\n resources:\n - group: \"\" # core\n resources: [\"endpoints\", \"services\", \"services/status\"]\n - level: None\n users: [\"system:unsecured\"]\n namespaces: [\"kube-system\"]\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"configmaps\"]\n - level: None\n users: [\"kubelet\"] # legacy kubelet identity\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes\", \"nodes/status\"]\n - level: None\n userGroups: [\"system:nodes\"]\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes\", \"nodes/status\"]\n - level: None\n users:\n - system:kube-controller-manager\n - system:kube-scheduler\n - system:serviceaccount:kube-system:endpoint-controller\n verbs: [\"get\", \"update\"]\n namespaces: [\"kube-system\"]\n resources:\n - group: \"\" # core\n resources: [\"endpoints\"]\n - level: None\n users: [\"system:apiserver\"]\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"namespaces\", \"namespaces/status\", \"namespaces/finalize\"]\n - level: None\n users: [\"cluster-autoscaler\"]\n verbs: [\"get\", \"update\"]\n namespaces: [\"kube-system\"]\n resources:\n - group: \"\" # core\n resources: [\"configmaps\", \"endpoints\"]\n # Don't log HPA fetching metrics.\n - level: None\n users:\n - system:kube-controller-manager\n verbs: [\"get\", \"list\"]\n resources:\n - group: \"metrics.k8s.io\"\n # Don't log these read-only URLs.\n - level: None\n nonResourceURLs:\n - /healthz*\n - /version\n - /swagger*\n # Don't log events requests.\n - level: None\n resources:\n - group: \"\" # core\n resources: [\"events\"]\n # node and pod status calls from nodes are high-volume and can be large, don't log responses for expected updates from nodes\n - level: Request\n users: [\"kubelet\", \"system:node-problem-detector\", \"system:serviceaccount:kube-system:node-problem-detector\"]\n verbs: [\"update\",\"patch\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes/status\", \"pods/status\"]\n omitStages:\n - \"RequestReceived\"\n - level: Request\n userGroups: [\"system:nodes\"]\n verbs: [\"update\",\"patch\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes/status\", \"pods/status\"]\n omitStages:\n - \"RequestReceived\"\n # deletecollection calls can be large, don't log responses for expected namespace deletions\n - level: Request\n users: [\"system:serviceaccount:kube-system:namespace-controller\"]\n verbs: [\"deletecollection\"]\n omitStages:\n - \"RequestReceived\"\n # Secrets, ConfigMaps, and TokenReviews can contain sensitive & binary data,\n # so only log at the Metadata level.\n - level: Metadata\n resources:\n - group: \"\" # core\n resources: [\"secrets\", \"configmaps\"]\n - group: authentication.k8s.io\n resources: [\"tokenreviews\"]\n omitStages:\n - \"RequestReceived\"\n # Get repsonses can be large; skip them.\n - level: Request\n verbs: [\"get\", \"list\", \"watch\"]\n resources:\n - group: \"\" # core\n - group: \"admissionregistration.k8s.io\"\n - group: \"apiextensions.k8s.io\"\n - group: \"apiregistration.k8s.io\"\n - group: \"apps\"\n - group: \"authentication.k8s.io\"\n - group: \"authorization.k8s.io\"\n - group: \"autoscaling\"\n - group: \"batch\"\n - group: \"certificates.k8s.io\"\n - group: \"extensions\"\n - group: \"metrics.k8s.io\"\n - group: \"networking.k8s.io\"\n - group: \"policy\"\n - group: \"rbac.authorization.k8s.io\"\n - group: \"settings.k8s.io\"\n - group: \"storage.k8s.io\"\n omitStages:\n - \"RequestReceived\"\n # Default level for known APIs\n - level: RequestResponse\n resources:\n - group: \"\" # core\n - group: \"admissionregistration.k8s.io\"\n - group: \"apiextensions.k8s.io\"\n - group: \"apiregistration.k8s.io\"\n - group: \"apps\"\n - group: \"authentication.k8s.io\"\n - group: \"authorization.k8s.io\"\n - group: \"autoscaling\"\n - group: \"batch\"\n - group: \"certificates.k8s.io\"\n - group: \"extensions\"\n - group: \"metrics.k8s.io\"\n - group: \"networking.k8s.io\"\n - group: \"policy\"\n - group: \"rbac.authorization.k8s.io\"\n - group: \"settings.k8s.io\"\n - group: \"storage.k8s.io\"\n omitStages:\n - \"RequestReceived\"\n # Default level for all other requests.\n - level: Metadata\n omitStages:\n - \"RequestReceived\"\npack:\n palette:\n config:\n oidc:\n identityProvider: noauth", + "manifests": [] + }, + { + "tag": "0.1.0", + "name": "cni-custom", + "type": "spectro", + "values": "manifests:\n byo-cni:\n contents: |\n apiVersion: v1\n kind: ConfigMap\n metadata:\n name: custom-cni\n data:\n # property-like keys; each key maps to a simple value\n custom-cni: \"byo-cni\"", + "manifests": [] + } + ] + } + ], + "policies": { + "scanPolicy": {} + }, + "clusterConfig": { + "machineManagementConfig": { + "osPatchConfig": { + "schedule": "", + "patchOnBoot": false, + "rebootIfRequired": false + } + }, + "updateWorkerPoolsInParallel": false, + "resources": { + "namespaces": [], + "rbacs": [] + }, + "location": null + } + } +} diff --git a/test/templates/two-node-update.json.tmpl b/test/templates/two-node-update.json.tmpl new file mode 100644 index 0000000..9b2d226 --- /dev/null +++ b/test/templates/two-node-update.json.tmpl @@ -0,0 +1,44 @@ +{ + "cloudConfig": { + "edgeHosts": [ + { + "IsCandidateCaption": false, + "hostAddress": "_____place_holder_____", + "hostName": "_____place_holder_____", + "hostUid": "_____place_holder_____", + "nic": { + "nicName": "ens160" + }, + "twoNodeCandidatePriority": "primary" + }, + { + "IsCandidateCaption": false, + "hostAddress": "_____place_holder_____", + "hostName": "_____place_holder_____", + "hostUid": "_____place_holder_____", + "nic": { + "nicName": "ens160" + }, + "twoNodeCandidatePriority": "secondary" + } + ] + }, + "poolConfig": { + "name": "master-pool", + "labels": [ + "master" + ], + "isControlPlane": true, + "useControlPlaneAsWorker": true, + "taints": [], + "additionalLabels": {}, + "nodeRepaveInterval": 0, + "updateStrategy": { + "type": "RollingUpdateScaleIn" + }, + "machinePoolProperties": { + "archType": "amd64" + }, + "size": 2 + } +} \ No newline at end of file diff --git a/test/test-two-node.sh b/test/test-two-node.sh new file mode 100755 index 0000000..c84607e --- /dev/null +++ b/test/test-two-node.sh @@ -0,0 +1,575 @@ +#!/bin/bash + +set -e + +# Usage +# ----- +# +# 1. Install prerequisites: +# - docker (https://docs.docker.com/engine/install/) +# - earthly (https://earthly.dev/get-earthly) +# - git (https://github.com/git-guides/install-git) +# - govc (https://github.com/vmware/govmomi/blob/main/govc/README.md#installation) +# WARNING: govc must be v0.32.0 or greater! +# - jq (https://jqlang.github.io/jq/download/) +# - mkisofs (https://command-not-found.com/mkisofs) +# +# 2. Clone CanvOS and checkout this branch. +# +# 3. Configure your Earthly argument file by running: cp .arg.template .arg +# No modifications to the template are required. +# +# 4. Create a .netrc file in the stylus repo root with GitHub +# credentials capable of cloning Spectro Cloud internal repos. +# +# 5. Copy the test/env.example file to test/.env and edit test/.env +# as required. +# +# 6. Source and execute this script: +# +# source ./test/test-two-node.sh +# ./test/test-two-node.sh + +# Do not edit anything below + +declare -a edge_host_names +declare -a vm_array + +function init_globals() { + if [ -n "$SUFFIX_OVERRIDE" ]; then + export HOST_SUFFIX=$HOST_SUFFIX-$SUFFIX_OVERRIDE + export CLUSTER_NAME=$CLUSTER_NAME-$SUFFIX_OVERRIDE + fi + + vm_array+=("tn1-$HOST_SUFFIX" "tn2-$HOST_SUFFIX") + export HOST_1="${vm_array[0]}" + export HOST_2="${vm_array[1]}" + echo "VM names: $HOST_1, $HOST_2" + + if [ -n "$REPLACEMENT_HOST" ]; then + export HOST_3="tn3-$HOST_SUFFIX" + vm_array+=($HOST_3) + echo "Added replacement VM: $HOST_3" + fi +} + +function create_canvos_args() { + cat < .arg +CUSTOM_TAG=twonode +IMAGE_REGISTRY=$OCI_REGISTRY +OS_DISTRIBUTION=ubuntu +IMAGE_REPO=ubuntu +OS_VERSION=22 +K8S_DISTRIBUTION=k3s +ISO_NAME=palette-edge-installer +ARCH=amd64 +HTTPS_PROXY= +HTTP_PROXY= +PROXY_CERT_PATH= +UPDATE_KERNEL=false +EOF +} + +function create_userdata() { + cat < build/user-data +#cloud-config +stylus: + debug: true + users: + - name: kairos + passwd: kairos + site: + edgeHostToken: "$EDGE_REGISTRATION_TOKEN" + paletteEndpoint: "$DOMAIN" +EOF + if [ -n "$PROXY" ]; then + cat <> build/user-data + network: + httpProxy: http://10.10.180.0:3128 + httpsProxy: http://10.10.180.0:3128 + noProxy: 10.10.128.10,.spectrocloud.dev,10.0.0.0/8 +EOF + fi + cat <> build/user-data +install: + poweroff: true +EOF + if [ -n "$WIFI_NETWORK" ]; then + cat <<'EOF' >> build/user-data + bind_mounts: + - /var/lib/wpa +stages: + initramfs: + - users: + kairos: + groups: + - sudo + passwd: kairos + network.before: + - name: "Connect to Wi-Fi" + commands: + - | + # Find the first wireless network interface + wireless_interface="" + for interface in $(ip link | grep -oP '^\d+: \K[^:]+(?=:)') + do + if [ -d "/sys/class/net/$interface/wireless" ]; then + wireless_interface=$interface + break + fi + done + # Check if a wireless interface was found and connect it to WiFi + if [ -n "$wireless_interface" ]; then + wpa_passphrase | tee /var/lib/wpa/wpa_supplicant.conf + wpa_supplicant -B -c /var/lib/wpa/wpa_supplicant.conf -i $wireless_interface + dhclient $wireless_interface + else + echo "No wireless network interface found." + fi +EOF + sed -i "s||$WIFI_NETWORK|g" build/user-data + sed -i "s||$WIFI_PASSWORD|g" build/user-data + fi + echo "created build/user-data" +} + +function create_iso() { + touch meta-data + mkisofs -output build/user-data.iso -volid cidata -joliet -rock $1 meta-data + rm -f meta-data +} + +function create_userdata_iso() { + echo Creating user-data ISO... + create_userdata + create_iso build/user-data +} + +function upload_userdata_iso() { + echo Uploading user-data ISO... + govc datastore.upload --ds=$GOVC_DATASTORE --dc=$GOVC_DATACENTER "build/user-data.iso" "${ISO_FOLDER}/user-data.iso" +} + +function upload_stylus_iso() { + iso=palette-edge-installer-stylus-${STYLUS_HASH}-k3s-${PROVIDER_K3S_HASH}.iso + echo Uploading installer ISO $iso... + govc datastore.upload --ds=$GOVC_DATASTORE --dc=$GOVC_DATACENTER build/$iso $STYLUS_ISO +} + +function create_vms() { + echo Creating VMs... + for vm in "${vm_array[@]}"; do + govc vm.create -m 8192 -c 4 -disk 100GB -net.adapter vmxnet3 -iso=$STYLUS_ISO -on=false -pool=$GOVC_RESOURCE_POOL $vm + dev=$(govc device.cdrom.add -vm $vm) + govc device.cdrom.insert -vm=$vm -device=$dev "${ISO_FOLDER}/user-data.iso" + govc vm.power -on $vm + done +} + +function destroy_vms() { + for vm in "${vm_array[@]}"; do + govc vm.destroy $vm + done +} + +function wait_for_vms_to_power_off() { + echo Waiting for both VMs to be flashed and power off... + while true; do + powerState1=$(govc vm.info -json=true "${vm_array[0]}" | jq -r .[][0].runtime.powerState) + powerState2=$(govc vm.info -json=true "${vm_array[1]}" | jq -r .[][0].runtime.powerState) + echo "Power state for ${vm_array[0]}: $powerState1" + echo "Power state for ${vm_array[1]}: $powerState2" + if [ "$powerState1" = "poweredOff" ] && [ "$powerState2" = "poweredOff" ]; then + echo VMs powered off! + break + fi + echo "VMs not powered off, sleeping for 5s..." + sleep 5 + done +} + +function reboot_vms() { + echo "Ejecting installer ISO & rebooting VMs..." + for vm in "${vm_array[@]}"; do + govc device.ls -vm=$vm + govc vm.power -off -force $vm + govc device.cdrom.eject -vm=$vm -device=cdrom-3000 + govc device.cdrom.eject -vm=$vm -device=cdrom-3001 + govc vm.power -on $vm + done +} + +function get_ready_edge_hosts() { + curl -s -X POST https://$DOMAIN/v1/dashboard/edgehosts/search \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" \ + -d \ + ' + { + "filter": { + "conjuction": "and", + "filterGroups": [ + { + "conjunction": "and", + "filters": [ + { + "property": "state", + "type": "string", + "condition": { + "string": { + "operator": "eq", + "negation": false, + "match": { + "conjunction": "or", + "values": [ + "ready", + "unpaired" + ] + }, + "ignoreCase": false + } + } + } + ] + } + ] + }, + "sort": [] + } + ' +} + +function wait_until_edge_hosts_ready() { + echo Waiting for both Edge Hosts to register and become healthy... + while true; do + set +e + ready=$(get_ready_edge_hosts | jq -e 'select(.items != []).items | map(. | select(.status.health.state == "healthy")) | length') + set -e + if [ -z ${ready} ]; then + ready=0 + fi + if [ $ready -ge 2 ]; then + echo Both Edge Hosts are healthy! + break + fi + echo "Only $ready/2 Edge Hosts are healthy, sleeping for 5s..." + sleep 5 + done +} + +function ready_edge_host_names() { + readarray -t edge_host_names < <(get_ready_edge_hosts | jq -r 'select(.items != []).items | map(.metadata.name) | flatten[]') + export EDGE_HOST_1=${edge_host_names[0]} + export EDGE_HOST_2=${edge_host_names[1]} + echo "Ready Edge Host names: ${edge_host_names[@]}" +} + +function destroy_edge_hosts() { + readarray -t edgeHosts < <(curl -s -X POST https://$DOMAIN/v1/dashboard/edgehosts/search \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" \ + -d \ + ' + { + "filter": { + "conjuction": "and", + "filterGroups": [ + { + "conjunction": "and", + "filters": [ + { + "property": "state", + "type": "string", + "condition": { + "string": { + "operator": "eq", + "negation": false, + "match": { + "conjunction": "or", + "values": [ + "ready", + "unpaired" + ] + }, + "ignoreCase": false + } + } + } + ] + } + ] + }, + "sort": [] + } + ' | jq -r '.items[].metadata.uid') + for host in "${edgeHosts[@]}"; do + curl -s -X DELETE https://$DOMAIN/v1/edgehosts/$host \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" + echo "Deleted Edge Host $host" + done +} + +function prepare_cluster_profile() { + if [ -z "${STYLUS_HASH}" ]; then + echo STYLUS_HASH is unset. Please execute build_all and retry. + return 1 + fi + jq ' + .metadata.name = env.CLUSTER_NAME | + .spec.template.packs[0].registry.metadata.uid = env.PUBLIC_PACK_REPO_UID | + .spec.template.packs[1].version = env.K3S_VERSION | + .spec.template.packs[1].tag = env.K3S_VERSION | + .spec.template.packs[1].registry.metadata.uid = env.PUBLIC_PACK_REPO_UID | + .spec.template.packs[2].registry.metadata.uid = env.PUBLIC_PACK_REPO_UID | + .spec.template.packs[0].values |= gsub("OCI_REGISTRY"; env.OCI_REGISTRY) | + .spec.template.packs[0].values |= gsub("PE_VERSION"; env.PE_VERSION) | + .spec.template.packs[0].values |= gsub("K3S_VERSION"; env.K3S_VERSION) | + .spec.template.packs[0].values |= gsub("STYLUS_HASH"; env.STYLUS_HASH) + ' test/templates/two-node-cluster-profile.json.tmpl > two-node-cluster-profile.json +} + +function create_cluster_profile() { + export CLUSTER_PROFILE_UID=$(curl -s -X POST https://$DOMAIN/v1/clusterprofiles/import?publish=true \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" \ + -d @two-node-cluster-profile.json | jq -r .uid) + rm -f two-node-cluster-profile.json + if [ "$CLUSTER_PROFILE_UID" = "null" ]; then + echo Cluster Profile creation failed as it already exists. Please delete it and retry. + return 1 + fi + echo "Cluster Profile $CLUSTER_PROFILE_UID created" +} + +function destroy_cluster_profile() { + clusterProfileUid=$1 + curl -s -X DELETE https://$DOMAIN/v1/clusterprofiles/$clusterProfileUid \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" + echo "Cluster Profile $clusterProfileUid deleted" +} + +function prepare_master_master_cluster() { + if [ -z "${STYLUS_HASH}" ]; then + echo STYLUS_HASH is unset. Please execute build_all and retry. + return 1 + fi + if nslookup $CLUSTER_VIP >/dev/null; then + echo CLUSTER_VIP: $CLUSTER_VIP is allocated. Please retry with an unallocated VIP. + return 1 + fi + jq ' + .metadata.name = env.CLUSTER_NAME | + .spec.cloudConfig.controlPlaneEndpoint.host = env.CLUSTER_VIP | + .spec.machinePoolConfig[0].cloudConfig.edgeHosts[0].hostName = env.EDGE_HOST_1 | + .spec.machinePoolConfig[0].cloudConfig.edgeHosts[0].hostUid = env.EDGE_HOST_1 | + .spec.machinePoolConfig[0].cloudConfig.edgeHosts[0].nicName = env.NIC_NAME | + .spec.machinePoolConfig[0].cloudConfig.edgeHosts[1].hostName = env.EDGE_HOST_2 | + .spec.machinePoolConfig[0].cloudConfig.edgeHosts[1].hostUid = env.EDGE_HOST_2 | + .spec.machinePoolConfig[0].cloudConfig.edgeHosts[1].nicName = env.NIC_NAME | + .spec.profiles[0].uid = env.CLUSTER_PROFILE_UID | + .spec.profiles[0].packValues[0].values |= gsub("OCI_REGISTRY"; env.OCI_REGISTRY) | + .spec.profiles[0].packValues[0].values |= gsub("PE_VERSION"; env.PE_VERSION) | + .spec.profiles[0].packValues[0].values |= gsub("K3S_VERSION"; env.K3S_VERSION) | + .spec.profiles[0].packValues[0].values |= gsub("STYLUS_HASH"; env.STYLUS_HASH) | + .spec.profiles[0].packValues[1].tag = env.K3S_VERSION + ' test/templates/two-node-master-master.json.tmpl > two-node-create.json +} + +function create_cluster() { + uid=$(curl -s -X POST https://$DOMAIN/v1/spectroclusters/edge-native?ProjectUid=$PROJECT_UID \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" \ + -d @two-node-create.json | jq -r .uid) + if [ "$uid" = "null" ]; then + echo "Cluster creation failed. Please check two-node-create.json and retry creation manually to see Hubble's response." + return 1 + else + rm -f two-node-create.json + echo "Cluster $uid created" + fi +} + +function destroy_cluster() { + clusterUid=$1 + curl -s -X PATCH https://$DOMAIN/v1/spectroclusters/$clusterUid/status/conditions \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" \ + -d \ + ' + [ + { + "message": "cleaned up", + "reason": "CloudInfrastructureCleanedUp", + "status": "True", + "type": "CloudInfrastructureCleanedUp" + } + ] + ' + echo "Cluster $clusterUid deleted" +} + +function prepare_cluster_update() { + export leaderIp=$1 + export replacementHostIp=$2 + jq ' + .cloudConfig.edgeHosts[0].hostAddress = env.leaderIp | + .cloudConfig.edgeHosts[0].hostName = env.HOST_1 | + .cloudConfig.edgeHosts[0].hostUid = env.HOST_1 | + .cloudConfig.edgeHosts[1].hostAddress = env.replacementHostIp | + .cloudConfig.edgeHosts[1].hostName = env.HOST_3 | + .cloudConfig.edgeHosts[1].hostUid = env.HOST_3 + ' test/templates/two-node-update.json.tmpl > two-node-update.json +} + +function update_cluster() { + cloudConfigUid=$1 + curl -X PUT https://$DOMAIN/v1/cloudconfigs/edge-native/$cloudConfigUid/machinePools/master-pool \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" \ + -d @two-node-update.json + rm -f two-node-update.json + echo "Cloud config $cloudConfigUid updated" +} + +function build_provider_k3s() { + echo "Building provider-k3s image..." + earthly +build-provider-package \ + --platform=linux/amd64 \ + --IMAGE_REPOSITORY=${OCI_REGISTRY} \ + --VERSION=${PROVIDER_K3S_HASH} + docker push ${OCI_REGISTRY}/provider-k3s:${PROVIDER_K3S_HASH} +} + +function build_stylus_package_and_framework() { + echo "Building stylus image and stylus framework image..." + earthly --allow-privileged +package \ + --platform=linux/amd64 \ + --IMAGE_REPOSITORY=${OCI_REGISTRY} \ + --BASE_IMAGE=quay.io/kairos/core-opensuse-leap:v2.3.2 \ + --VERSION=v0.0.0-${STYLUS_HASH} + docker push ${OCI_REGISTRY}/stylus-linux-amd64:v0.0.0-${STYLUS_HASH} + docker push ${OCI_REGISTRY}/stylus-framework-linux-amd64:v0.0.0-${STYLUS_HASH} +} + +function build_canvos() { + echo "Building provider image & installer ISO..." + earthly +build-all-images \ + --ARCH=amd64 \ + --PROVIDER_BASE=${OCI_REGISTRY}/provider-k3s:${PROVIDER_K3S_HASH} \ + --STYLUS_BASE=${OCI_REGISTRY}/stylus-framework-linux-amd64:v0.0.0-${STYLUS_HASH} \ + --ISO_NAME=palette-edge-installer-stylus-${STYLUS_HASH}-k3s-${PROVIDER_K3S_HASH} \ + --IMAGE_REGISTRY=${OCI_REGISTRY} \ + --TWO_NODE=true \ + --TWO_NODE_BACKEND=${TWO_NODE_BACKEND} \ + --CUSTOM_TAG=${STYLUS_HASH} \ + --PE_VERSION=v${PE_VERSION} + docker push ${OCI_REGISTRY}/ubuntu:k3s-${K3S_VERSION}-v${PE_VERSION}-${STYLUS_HASH} +} + +function build_all() { + + # optionally build/rebuild provider-k3s + test -d ../provider-k3s || ( cd .. && git clone https://github.com/kairos-io/provider-k3s -b ${PROVIDER_K3S_BRANCH}) + cd ../provider-k3s + export PROVIDER_K3S_HASH=$(git describe --always) + ( + docker image ls --format "{{.Repository}}:{{.Tag}}" | \ + grep -q ${OCI_REGISTRY}/provider-k3s:${PROVIDER_K3S_HASH} + ) || ( build_provider_k3s ) + + # optionally build/rebuild stylus images + test -d ../stylus || ( cd .. && git clone https://github.com/spectrocloud/stylus -b ${STYLUS_BRANCH} ) + cd ../stylus + export STYLUS_HASH=$(git describe --always) + ( + docker image ls --format "{{.Repository}}:{{.Tag}}" | \ + grep -q $OCI_REGISTRY/stylus-linux-amd64:v0.0.0-${STYLUS_HASH} + ) || ( build_stylus_package_and_framework ) + + # optionally build/rebuild provider image & installer ISO + cd ../CanvOS + ( + test -f build/palette-edge-installer-stylus-${STYLUS_HASH}-k3s-${PROVIDER_K3S_HASH}.iso && \ + docker image ls --format "{{.Repository}}:{{.Tag}}" | \ + grep -q ${OCI_REGISTRY}/ubuntu:k3s-${K3S_VERSION}-v${PE_VERSION}-${STYLUS_HASH} + ) || ( build_canvos ) +} + +function clean_all() { + docker images | grep $OCI_REGISTRY | awk '{print $3;}' | xargs docker rmi --force + docker images | grep palette-installer | awk '{print $3;}' | xargs docker rmi --force + earthly prune --reset + docker system prune --all --volumes --force +} + +function main() { + init_globals + + # build all required edge artifacts + build_all + + # upload installer ISO to vSphere + upload_stylus_iso + + # create & upload user-data ISOs, configured to enable two node mode + create_userdata_iso + upload_userdata_iso + + # create VMs in vSphere, wait for the installation phase to complete, + # then power them off, remove the installer ISO, and reboot them + create_vms + wait_for_vms_to_power_off + reboot_vms + + # wait for the VMs to register with Palette and appear as Edge Hosts + wait_until_edge_hosts_ready + ready_edge_host_names + + # optionally create a two node Cluster Profile using the latest artifact + # versions - can be skipped by specifying the UID + if [ -z "${CLUSTER_PROFILE_UID}" ]; then + prepare_cluster_profile + create_cluster_profile + fi + + # create a new Edge Native cluster in Palette using the Edge Hosts + # provisioned above, plus the two node Cluster Profile + prepare_master_master_cluster + create_cluster +} + +# This line and the if condition below allow sourcing the script without executing +# the main function +(return 0 2>/dev/null) && sourced=1 || sourced=0 + +if [[ $sourced == 1 ]]; then + script=${BASH_SOURCE[0]} + if [ -z "$script" ]; then + script=$0 + fi + set +e + echo "You can now use any of these functions:" + echo "" + grep ^function $script | grep -v main | awk '{gsub(/function /,""); gsub(/\(\) \{/,""); print;}' + echo +else + envfile=$(dirname "${0}")/.env + if [ -f "${envfile}" ]; then + source "${envfile}" + echo "Sourced $envfile" + else + echo "Please create a .env file in the test directory and populate it with the required variables." + exit 1 + fi + main +fi