From 7f051c0afc9d04cc33e6096f32d38e8d6c6c3a66 Mon Sep 17 00:00:00 2001 From: Lenin Mehedy Date: Tue, 8 Aug 2023 01:40:33 +1000 Subject: [PATCH] fix: update verification script so that CI pipeline fails if node fails to start (#254) Signed-off-by: Lenin Mehedy --- dev/Makefile | 39 +++++++++++++++++++++++++++++---------- dev/scripts/helper.sh | 4 +++- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/dev/Makefile b/dev/Makefile index 60eb86797..1ecc1c5ff 100644 --- a/dev/Makefile +++ b/dev/Makefile @@ -1,8 +1,19 @@ +# Force the use of bash as the shell for more features SHELL=/bin/bash + +# Ensure we can catch error to run cleanup when multiple make commands are run in sequence. +# Here we tell make to run all scripts as one-shell and also set 'pipefail' and 'errexit' flags. +# https://stackoverflow.com/questions/28597794/how-can-i-clean-up-after-an-error-in-a-makefile +SHELLOPTS:=$(if $(SHELLOPTS),$(SHELLOPTS):)pipefail:errexit +.ONESHELL: + +# Here we tell make not to output the actual command before execution in order to reduce noise in the logs. +.SILENT: setup setup-cluster deploy-chart helm-test deploy-network destroy-test-container destroy-network test + +# Setup variables SCRIPTS_DIR=$(PWD)/scripts CHART_DIR=$(PWD)/../charts/hedera-network SCRIPT_NAME=direct-install.sh - TMP_DIR=${SCRIPTS_DIR}/../temp .PHONY: all @@ -10,16 +21,16 @@ all: setup setup-cluster reset .PHONY: setup setup: - @rm -f "${TMP_DIR}/"*.* - @cp "${SCRIPTS_DIR}/template.env" "${TMP_DIR}/.env" + rm -f "${TMP_DIR}/"*.* + cp "${SCRIPTS_DIR}/template.env" "${TMP_DIR}/.env" .PHONY: setup-cluster setup-cluster: - @kind create cluster -n fst + kind create cluster -n fst .PHONY: deploy-chart deploy-chart: - @echo ">> Deploying helm chart..." && \ + echo ">> Deploying helm chart..." && \ echo "" && \ if [ "${SCRIPT_NAME}" = "nmt-install.sh" ]; then \ helm install fst ../charts/hedera-network --set defaults.root.image.repository=hashgraph/full-stack-testing/ubi8-init-dind ; \ @@ -29,14 +40,14 @@ deploy-chart: .PHONY: helm-test helm-test: - @echo "" && \ + echo "" && \ echo ">> Running helm test..." && \ echo "" && \ helm test fst --logs .PHONY: deploy-network deploy-network: deploy-chart - @echo "" && \ + echo "" && \ echo ">> Pod Information...." && \ echo "" && \ kubectl get pods -o wide && \ @@ -47,19 +58,27 @@ deploy-network: deploy-chart .PHONY: destroy-test-container destroy-test-container: - @echo "" && \ + echo "" && \ echo ">> Deleting test container..." && \ kubectl delete pod network-test || true .PHONY: destroy-network destroy-network: destroy-test-container - @echo "" && \ + echo "" && \ echo ">> Uninstalling helm chart..." && \ helm uninstall fst && \ sleep 10 .PHONY: test -test: setup deploy-network helm-test setup-nodes start-nodes destroy-network +test: + # Enable cleanup_test function so that even if test fails, we cleanup the cluster. + # We are only enabling this in this make target, however if necessary, similar pattern can be used in other targets. + # Ref: https://stackoverflow.com/questions/28597794/how-can-i-clean-up-after-an-error-in-a-makefile + function cleanup_test { + $(MAKE) destroy-network + } + trap cleanup_test EXIT # always destroy-network on exit + $(MAKE) setup deploy-network helm-test setup-nodes start-nodes .PHONY: setup-nodes setup-nodes: setup diff --git a/dev/scripts/helper.sh b/dev/scripts/helper.sh index 813cbf005..989d4becd 100755 --- a/dev/scripts/helper.sh +++ b/dev/scripts/helper.sh @@ -640,7 +640,9 @@ function verify_network_state() { done if [[ "${status}" != *"${status_pattern}"* ]]; then + # capture the docker log in a local file for investigation "${KCTL}" exec "${pod}" -c root-container -- docker logs swirlds-node >"${TMP_DIR}/${pod}-swirlds-node.log" + echo "ERROR: <<< The network is not operational in ${pod}. >>>" return "${EX_ERR}" fi @@ -661,7 +663,7 @@ function verify_node_all() { local node_name for node_name in "${NODE_NAMES[@]}"; do local pod="network-${node_name}-0" # pod name - verify_network_state "${pod}" "${MAX_ATTEMPTS}" + verify_network_state "${pod}" "${MAX_ATTEMPTS}" || return "${EX_ERR}" log_time "verify_network_state" done