From 97ddb6e287522e855faaacc007731c91873434c3 Mon Sep 17 00:00:00 2001 From: Adrian Muraru Date: Wed, 4 Mar 2020 16:01:33 +0200 Subject: [PATCH] Issue 102: ZK Ensemble fails to bootstrap if headless domain DNS resolution is failing (#129) This patch addresses the issue #132 During bootstrap observer nodes (nodes: 2,...) fail to register if kubernetes headless DNS fails to bootstrap fast enough. This patch ensures that - in such cases - the dynamic config file is not written, allowing the instance to retry when the failed zk instance is restarted (e.g kubernetes reschedules the pod) Signed-off-by: Adi Muraru --- docker/bin/zookeeperStart.sh | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/docker/bin/zookeeperStart.sh b/docker/bin/zookeeperStart.sh index 34e524b05..5fc7521d0 100755 --- a/docker/bin/zookeeperStart.sh +++ b/docker/bin/zookeeperStart.sh @@ -35,29 +35,34 @@ MYID=$((ORD+1)) # Values for first startup WRITE_CONFIGURATION=true REGISTER_NODE=true -ONDISK_CONFIG=false +ONDISK_MYID_CONFIG=false +ONDISK_DYN_CONFIG=false # Check validity of on-disk configuration if [ -f $MYID_FILE ]; then EXISTING_ID="`cat $DATA_DIR/myid`" if [[ "$EXISTING_ID" == "$MYID" && -f $STATIC_CONFIG ]]; then # If Id is correct and configuration is present under `/data/conf` - ONDISK_CONFIG=true + ONDISK_MYID_CONFIG=true fi fi +if [ -f $DYNCONFIG ]; then + ONDISK_DYN_CONFIG=true +fi + # Determine if there is a ensemble available to join by checking the service domain set +e nslookup $DOMAIN if [[ $? -eq 1 ]]; then # If an nslookup of the headless service domain fails, then there is no - # active ensemble + # active ensemble yet ACTIVE_ENSEMBLE=false else ACTIVE_ENSEMBLE=true fi -if [[ "$ONDISK_CONFIG" == true ]]; then +if [[ "$ONDISK_MYID_CONFIG" == true && "$ONDISK_DYN_CONFIG" == true ]]; then # If Configuration is present, we assume, there is no need to write configuration. WRITE_CONFIGURATION=false else @@ -65,11 +70,11 @@ else fi if [[ "$ACTIVE_ENSEMBLE" == false ]]; then - # This is the first node being added to the cluster + # This is the first node being added to the cluster or headless service not yet available REGISTER_NODE=false else # An ensemble exists, check to see if this node is already a member. - if [[ "$ONDISK_CONFIG" == false ]]; then + if [[ "$ONDISK_MYID_CONFIG" == false || "$ONDISK_DYN_CONFIG" == false ]]; then REGISTER_NODE=true else REGISTER_NODE=false @@ -118,5 +123,11 @@ if [[ ! -d "$ZOOCFGDIR" ]]; then cp -f /conf/env.sh $ZOOCFGDIR fi -echo Starting zookeeper service -zkServer.sh --config $ZOOCFGDIR start-foreground +if [ -f $DYNCONFIG ]; then + # Node registered, start server + echo Starting zookeeper service + zkServer.sh --config $ZOOCFGDIR start-foreground +else + echo "Node failed to register!" + exit 1 +fi