From 4088352fd4df84b2a8be7f70fbb2bdf6f220cc4d Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Fri, 12 Jan 2024 16:00:43 +0100 Subject: [PATCH] Update docs and CI to support rabbitmq-external chart (#655) * fix the system containers seeding group * add more storage to vms in test env * ci.sh: Add rabbitmq-external chart to artifact * Use `rabbitmq` Helm chart (instead of `rabbitmq-external`) * Document RabbitMQ installation Co-authored-by: Sven Tennie * offline-cluster.sh: Don't use rabbitmq and helm_external playbooks * add helm_external playbook installation step in offline-helm.sh * use k8s based rabbitmq by default * Point restund_allowed_private_network_cidrs to 172.16.0.1/24 * offline-cluster.sh: Run restund Ansible playbook last * [temp] use mandarian release to build the artifact Current Mandarin * pickup valid changes from #656 * [temp] debug the failing CI * use the main wire-server chart repo * pass the ssh-agent path and remove verbosity * use fixed inventory path for CI * pass correct ssh_auth_sock * Revert "use fixed inventory path for CI" This reverts commit ec0bbeded40e4363347b70e931ad19382807357a. * fix indentation and if-else statements * update helm_external playbook to add tag to rabbitmq specific croles * fix documentation and CI with rabbitmq chart * add note for adding etcd groups * add more details to rabbitmq configuration steps --------- Co-authored-by: Sven Tennie --- ansible/helm_external.yml | 1 + ansible/inventory/offline/99-static | 6 +- ansible/roles-external/sft | 2 +- ansible/seed-offline-containerd.yml | 3 +- bin/offline-cluster.sh | 45 +++++++---- bin/offline-deploy.sh | 1 - bin/offline-helm.sh | 1 + bin/offline-vm-setup.sh | 2 +- offline/ci.sh | 1 + offline/docs_ubuntu_22.04.md | 81 +++++++------------- offline/federation_preparation.md | 82 +++++++++++++++++++++ values/wire-server/prod-values.example.yaml | 4 +- 12 files changed, 153 insertions(+), 76 deletions(-) create mode 100644 offline/federation_preparation.md diff --git a/ansible/helm_external.yml b/ansible/helm_external.yml index 1ba1dc2e9..aed173a47 100644 --- a/ansible/helm_external.yml +++ b/ansible/helm_external.yml @@ -45,3 +45,4 @@ external_dir_name: rabbitmq-external server_type: rmq-cluster network_interface: "{{ rabbitmq_network_interface }}" + tags: rabbitmq-external diff --git a/ansible/inventory/offline/99-static b/ansible/inventory/offline/99-static index 1e8593548..e0c1369ad 100644 --- a/ansible/inventory/offline/99-static +++ b/ansible/inventory/offline/99-static @@ -86,6 +86,7 @@ #domain = "example.com" #deeplink_title = "example.com environment" +# Rabbitmq specific variables [rmq-cluster:vars] # rabbitmq_network_interface = enp1s0 @@ -99,8 +100,8 @@ # restund_allowed_private_network_cidrs = a.b.c.d/24 # If you install restund together with other services on the same machine # you need to restund_allowed_private_network_cidrs to allow these services -# to communicate on the private network. E.g. If your private network is 172.16.0.1/24 -# restund_allowed_private_network_cidrs = 172.16.0/24 +# to communicate on the private network. E.g. If your private network is 172.16.0.0/24 +# restund_allowed_private_network_cidrs = '["172.16.0.0/24"]' # Explicitely specify the restund user id to be "root" to override the default of "997" restund_uid = root @@ -180,6 +181,7 @@ elasticsearch # minio2 # minio3 +# Add all rabbitmq nodes here [rmq-cluster] # rabbitmq1 # rabbitmq2 diff --git a/ansible/roles-external/sft b/ansible/roles-external/sft index 0839e76da..a11e1d918 160000 --- a/ansible/roles-external/sft +++ b/ansible/roles-external/sft @@ -1 +1 @@ -Subproject commit 0839e76da61d1cb6e795bf412ea9ef60825e1370 +Subproject commit a11e1d91826ea3d8ffee2e1ba23eb0dfe7c333b5 diff --git a/ansible/seed-offline-containerd.yml b/ansible/seed-offline-containerd.yml index d0f0a2d3a..85bc50b6f 100644 --- a/ansible/seed-offline-containerd.yml +++ b/ansible/seed-offline-containerd.yml @@ -1,5 +1,6 @@ - name: Seed system containers - hosts: k8s-cluster:etcd + # Add etcd group here if you are deploying separate worker and master clusters + hosts: k8s-cluster tags: system-containers tasks: - name: load containers diff --git a/bin/offline-cluster.sh b/bin/offline-cluster.sh index c25d240d7..c3976bf87 100755 --- a/bin/offline-cluster.sh +++ b/bin/offline-cluster.sh @@ -2,43 +2,60 @@ set -eou pipefail -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -ANSIBLE_DIR="$( cd "$SCRIPT_DIR/../ansible" && pwd )" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ANSIBLE_DIR="$(cd "$SCRIPT_DIR/../ansible" && pwd)" set -x ls $ANSIBLE_DIR/inventory/offline +if [ -f "$ANSIBLE_DIR/inventory/offline/hosts.ini" ]; then + INVENTORY_FILE="$ANSIBLE_DIR/inventory/offline/hosts.ini" +elif [ -f "$ANSIBLE_DIR/inventory/offline/inventory.yml" ]; then + INVENTORY_FILE="$ANSIBLE_DIR/inventory/offline/inventory.yml" +else + echo "No inventory file in ansible/inventory/offline/. Please supply an $ANSIBLE_DIR/inventory/offline/inventory.yml or $ANSIBLE_DIR/inventory/offline/hosts.ini" + exit -1 +fi + +if [ -f "$ANSIBLE_DIR/inventory/offline/hosts.ini" ] && [ -f "$ANSIBLE_DIR/inventory/offline/inventory.yml" ]; then + echo "Both hosts.ini and inventory.yml provided in ansible/inventory/offline! Pick only one." + exit -1 +fi + +echo "using ansible inventory: $INVENTORY_FILE" + # Populate the assethost, and prepare to install images from it. # # Copy over binaries and debs, serves assets from the asset host, and configure # other hosts to fetch debs from it. # # If this step fails partway, and you know that parts of it completed, the `--skip-tags debs,binaries,containers,containers-helm,containers-other` tags may come in handy. -ansible-playbook -i $ANSIBLE_DIR/inventory/offline $ANSIBLE_DIR/setup-offline-sources.yml +ansible-playbook -i $INVENTORY_FILE $ANSIBLE_DIR/setup-offline-sources.yml # Run kubespray until docker is installed and runs. This allows us to preseed the docker containers that # are part of the offline bundle -ansible-playbook -i $ANSIBLE_DIR/inventory/offline $ANSIBLE_DIR/kubernetes.yml --tags bastion,bootstrap-os,preinstall,container-engine +ansible-playbook -i $INVENTORY_FILE $ANSIBLE_DIR/kubernetes.yml --tags bastion,bootstrap-os,preinstall,container-engine # Install docker on the restund nodes -ansible-playbook -i $ANSIBLE_DIR/inventory/offline $ANSIBLE_DIR/restund.yml --tags docker +ansible-playbook -i $INVENTORY_FILE $ANSIBLE_DIR/restund.yml --tags docker # With ctr being installed on all nodes that need it, seed all container images: -ansible-playbook -i $ANSIBLE_DIR/inventory/offline $ANSIBLE_DIR/seed-offline-containerd.yml +ansible-playbook -i $INVENTORY_FILE $ANSIBLE_DIR/seed-offline-containerd.yml # Install NTP -ansible-playbook -i $ANSIBLE_DIR/inventory/offline $ANSIBLE_DIR/sync_time.yml -v +ansible-playbook -i $INVENTORY_FILE $ANSIBLE_DIR/sync_time.yml -v # Run the rest of kubespray. This should bootstrap a kubernetes cluster successfully: -ansible-playbook -i $ANSIBLE_DIR/inventory/offline $ANSIBLE_DIR/kubernetes.yml --skip-tags bootstrap-os,preinstall,container-engine +ansible-playbook -i $INVENTORY_FILE $ANSIBLE_DIR/kubernetes.yml --skip-tags bootstrap-os,preinstall,container-engine ./bin/fix_default_router.sh # Deploy all other services which don't run in kubernetes. -ansible-playbook -i $ANSIBLE_DIR/inventory/offline $ANSIBLE_DIR/cassandra.yml -ansible-playbook -i $ANSIBLE_DIR/inventory/offline $ANSIBLE_DIR/elasticsearch.yml -ansible-playbook -i $ANSIBLE_DIR/inventory/offline $ANSIBLE_DIR/restund.yml -ansible-playbook -i $ANSIBLE_DIR/inventory/offline $ANSIBLE_DIR/minio.yml -ansible-playbook -i $ANSIBLE_DIR/inventory/offline $ANSIBLE_DIR/rabbitmq.yml -ansible-playbook -i $ANSIBLE_DIR/inventory/offline $ANSIBLE_DIR/helm_external.yml +ansible-playbook -i $INVENTORY_FILE $ANSIBLE_DIR/cassandra.yml +ansible-playbook -i $INVENTORY_FILE $ANSIBLE_DIR/elasticsearch.yml +ansible-playbook -i $INVENTORY_FILE $ANSIBLE_DIR/minio.yml +ansible-playbook -i $INVENTORY_FILE $ANSIBLE_DIR/restund.yml + +# create helm values that tell our helm charts what the IP addresses of cassandra, elasticsearch and minio are: +ansible-playbook -i $INVENTORY_FILE $ANSIBLE_DIR/helm_external.yml --skip-tags=rabbitmq-external diff --git a/bin/offline-deploy.sh b/bin/offline-deploy.sh index cfc475c76..bc38950c7 100755 --- a/bin/offline-deploy.sh +++ b/bin/offline-deploy.sh @@ -18,6 +18,5 @@ WSD_CONTAINER=$(sudo docker load -i $SCRIPT_DIR/../containers-adminhost/containe ./bin/offline-secrets.sh - sudo docker run --network=host -v $SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent -v $PWD:/wire-server-deploy $WSD_CONTAINER ./bin/offline-cluster.sh sudo docker run --network=host -v $PWD:/wire-server-deploy $WSD_CONTAINER ./bin/offline-helm.sh diff --git a/bin/offline-helm.sh b/bin/offline-helm.sh index 6512783c8..b344c68f6 100755 --- a/bin/offline-helm.sh +++ b/bin/offline-helm.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash set -euo pipefail +set -x helm upgrade --install --wait cassandra-external ./charts/cassandra-external --values ./values/cassandra-external/values.yaml helm upgrade --install --wait elasticsearch-external ./charts/elasticsearch-external --values ./values/elasticsearch-external/values.yaml diff --git a/bin/offline-vm-setup.sh b/bin/offline-vm-setup.sh index af323a32c..7c683f2b9 100755 --- a/bin/offline-vm-setup.sh +++ b/bin/offline-vm-setup.sh @@ -67,7 +67,7 @@ create_node () { sudo virt-install \ --name "$name" \ --ram 8192 \ - --disk path=/var/kvm/images/"$name".img,size=80 \ + --disk path=/var/kvm/images/"$name".img,size=100 \ --vcpus 6 \ --network bridge=br0 \ --graphics none \ diff --git a/offline/ci.sh b/offline/ci.sh index 85ad63329..97d65ccc5 100755 --- a/offline/ci.sh +++ b/offline/ci.sh @@ -92,6 +92,7 @@ charts=( wire/sftd wire/restund wire/rabbitmq + wire/rabbitmq-external # Has a weird dependency on curl:latest. out of scope # wire-server-metrics # fluent-bit diff --git a/offline/docs_ubuntu_22.04.md b/offline/docs_ubuntu_22.04.md index 37407202e..fa85e4db2 100644 --- a/offline/docs_ubuntu_22.04.md +++ b/offline/docs_ubuntu_22.04.md @@ -280,19 +280,6 @@ the `wire.com/external-ip` annotation to the public IP of the node. In order to automatically generate deeplinks, Edit the minio variables in `[minio:vars]` (`prefix`, `domain` and `deeplink_title`) by replacing `example.com` with your own domain. -### Configuring rabbitmq - -Add the nodes in which you want to run rabbitmq to the `[rmq-cluster]` group. Also, update the `ansible/roles/rabbimq-cluster/defaults/main.yml` file with the correct configurations for your environment. - -Important: RabbitMQ nodes address each other using a node name, for e.g rabbitmq@ansnode1 -Please refer to official doc and configure your DNS based on the setup - https://www.rabbitmq.com/clustering.html#cluster-formation-requirements - -For adding entries to local host file(/etc/hosts), run -``` -d ansible-playbook -i ansible/inventory/offline/hosts.ini ansible/roles/rabbitmq-cluster/tasks/configure_dns.yml -``` - - ### Example hosts.ini @@ -327,7 +314,10 @@ deeplink_title = "wire demo environment, example.com" [restund:vars] restund_uid = root -restund_allowed_private_network_cidrs=172.16.0.1/24 +restund_allowed_private_network_cidrs='["172.16.0.0/24"]' + +[rmq-cluster:vars] +rabbitmq_network_interface = enp1s0 [kube-master] kubenode1 @@ -394,23 +384,6 @@ Minio and restund services have shared secrets with the `wire-server` helm chart This should generate two files. `./ansible/inventory/group_vars/all/secrets.yaml` and `values/wire-server/secrets.yaml`. -## Deploying Kubernetes, Restund and stateful services - -In order to deploy all the services run: -``` -d ./bin/offline-cluster.sh -``` -In case any of the steps in this script fail, see the notes in the comments that accompany each step. -Comment out steps that have already completed when re-running the scripts. - -#### Ensuring kubernetes is healthy. - -Ensure the cluster comes up healthy. The container also contains kubectl, so check the node status: - -``` -d kubectl get nodes -owide -``` -They should all report ready. ### WORKAROUND: old debian key All of our debian archives up to version 4.12.0 used a now-outdated debian repository signature. Some modifications are required to be able to install everything properly. @@ -460,6 +433,25 @@ Hash: SHA1, RIPEMD160, SHA256, SHA384, SHA512, SHA224 Compression: Uncompressed, ZIP, ZLIB, BZIP2 ``` +## Deploying Kubernetes, Restund and stateful services + +In order to deploy all mentioned services, run: +``` +d ./bin/offline-cluster.sh +``` +In case any of the steps in this script fail, see the notes in the comments that accompany each step. +Comment out steps that have already completed when re-running the scripts. + +#### Ensuring Kubernetes is healthy. + +Ensure the cluster comes up healthy. The container also contains `kubectl`, so check the node status: + +``` +d kubectl get nodes -owide +``` +They should all report ready. + + #### Troubleshooting restund In case the restund firewall fails to start. Fix @@ -507,41 +499,22 @@ ufw allow 25672/tcp; ' ``` -Afterwards, run the following playbook to create helm values that tell our helm charts -what the IP addresses of cassandra, elasticsearch, minio and rabbitmq are. - -``` -d ansible-playbook -i ./ansible/inventory/offline/hosts.ini ansible/helm_external.yml -``` - -#### Installing Rabbitmq +### Preparation for Federation +For enabling Federation, we need to have RabbitMQ in place. Please follow the instructions in [offline/federation_preparation.md](./federation_preparation.md) for setting up RabbitMQ. -To install the rabbitmq, -First copy the value and secret file: -``` -cp ./values/rabbitmq/prod-values.example.yaml ./values/rabbitmq/values.yaml -cp ./values/rabbitmq/prod-secrets.example.yaml ./values/rabbitmq/secrets.yaml -``` - -Now, update the `./values/rabbitmq/values.yaml` and `./values/rabbitmq/secrets.yaml` with correct values as per needed. - -Deploy the rabbitmq helm chart - -``` -d helm upgrade --install rabbitmq ./charts/rabbitmq --values ./values/rabbitmq/values.yaml --values ./values/rabbitmq/secrets.yaml -``` +After that continue to the next steps below. ### Deploying Wire It's now time to deploy the helm charts on top of kubernetes, installing the Wire platform. #### Finding the stateful services -First. Make kubernetes aware of where alll the external stateful services are by running: +First, setup interfaces from Kubernetes to external services by running: ``` d helm install cassandra-external ./charts/cassandra-external --values ./values/cassandra-external/values.yaml d helm install elasticsearch-external ./charts/elasticsearch-external --values ./values/elasticsearch-external/values.yaml d helm install minio-external ./charts/minio-external --values ./values/minio-external/values.yaml -d helm install rabbitmq-external ./charts/rabbitmq-external --values ./values/rabbitmq-external/values.yaml ``` #### Deploying stateless dependencies diff --git a/offline/federation_preparation.md b/offline/federation_preparation.md new file mode 100644 index 000000000..ce7dcf13c --- /dev/null +++ b/offline/federation_preparation.md @@ -0,0 +1,82 @@ +## RabbitMQ + +There are two methods to deploy the RabbitMQ cluster: + +### Method 1: Install RabbitMQ inside kubernetes cluster with the help of helm chart + +To install the RabbitMQ service, first copy the value and secret files: +``` +cp ./values/rabbitmq/prod-values.example.yaml ./values/rabbitmq/values.yaml +cp ./values/rabbitmq/prod-secrets.example.yaml ./values/rabbitmq/secrets.yaml +``` +By default this will create a RabbitMQ deployment with ephemeral storage. To use the local persistence storage of Kubernetes nodes, please refer to the related documentation in [offline/local_persistent_storage_k8s.md](./local_persistent_storage_k8s.md). + +Now, update the `./values/rabbitmq/values.yaml` and `./values/rabbitmq/secrets.yaml` with correct values as needed. + +Deploy the `rabbitmq` helm chart: +``` +d helm upgrade --install rabbitmq ./charts/rabbitmq --values ./values/rabbitmq/values.yaml --values ./values/rabbitmq/secrets.yaml +``` + +### Method 2: Install RabbitMQ outside of the Kubernetes cluster with an Ansible playbook + +Add the nodes on which you want to run rabbitmq to the `[rmq-cluster]` group in the `ansible/inventory/offline/hosts.ini` file. Also, update the `ansible/roles/rabbitmq-cluster/defaults/main.yml` file with the correct configurations for your environment. + +If you need RabbitMQ to listen on a different interface than the default gateway, set `rabbitmq_network_interface` + +You should have following entries in the `/ansible/inventory/offline/hosts.ini` file. For example: +``` +[rmq-cluster:vars] +rabbitmq_network_interface = enp1s0 + +[rmq-cluster] +ansnode1 +ansnode2 +ansnode3 +``` + + +#### Hostname Resolution +RabbitMQ nodes address each other using a node name, a combination of a prefix and domain name, either short or fully-qualified (FQDNs). For e.g. rabbitmq@ansnode1 + +Therefore every cluster member must be able to resolve hostnames of every other cluster member, its own hostname, as well as machines on which command line tools such as rabbitmqctl might be used. + +Nodes will perform hostname resolution early on node boot. In container-based environments it is important that hostname resolution is ready before the container is started. + +Hostname resolution can use any of the standard OS-provided methods: + +For e.g. DNS records +Local host files (e.g. /etc/hosts) +Reference - https://www.rabbitmq.com/clustering.html#cluster-formation-requirements + + +For adding entries to local host file(`/etc/hosts`), run +``` +d ansible-playbook -i ansible/inventory/offline/hosts.ini ansible/roles/rabbitmq-cluster/tasks/configure_dns.yml +``` + +Create the rabbitmq cluster: + +``` +d ansible-playbook -i ansible/inventory/offline/hosts.ini ansible/rabbitmq.yml +``` + +and run the following playbook to create values file for helm charts to look for RabbitMQ IP addresses - + +``` +d ansible-playbook -i ./ansible/inventory/offline/hosts.ini ansible/helm_external.yml --tags=rabbitmq-external +``` + +Make Kubernetes aware of where RabbitMQ external stateful service is running: +``` +d helm install rabbitmq-external ./charts/rabbitmq-external --values ./values/rabbitmq-external/values.yaml +``` + +Configure wire-server to use the external RabbitMQ service: + +Edit the `/values/wire-server/prod-values.example.yaml` file to update the RabbitMQ host +Under `brig` and `galley` section, you will find the `rabbitmq` config, update the host to `rabbitmq-external`, it should look like this: +``` +rabbitmq: + host: rabbitmq-external +``` diff --git a/values/wire-server/prod-values.example.yaml b/values/wire-server/prod-values.example.yaml index f70276e47..7549b5a27 100644 --- a/values/wire-server/prod-values.example.yaml +++ b/values/wire-server/prod-values.example.yaml @@ -28,7 +28,7 @@ brig: elasticsearch: host: elasticsearch-external rabbitmq: - host: rabbitmq-external + host: rabbitmq # name of the rabbitmq service, either `rabbitmq-external` or `rabbitmq` useSES: false # Set to false if you want to hand out DynamoDB to store prekeys randomPrekeys: true @@ -141,7 +141,7 @@ galley: cassandra: host: cassandra-external rabbitmq: - host: rabbitmq-external + host: rabbitmq # name of the rabbitmq service, either `rabbitmq-external` or `rabbitmq` settings: # prefix URI used when inviting users to a conversation by link conversationCodeURI: https://account.example.com/conversation-join/ # change this