From 3221f4a9097ae65ca72d449d589365e0a0db742b Mon Sep 17 00:00:00 2001
From: Jack Francis <jackfrancis@gmail.com>
Date: Tue, 26 Jan 2021 15:52:34 -0800
Subject: [PATCH] fix: don't uncordon and enable cluster-autoscaler after node
 is deleted (#42)

---
 vmss-prototype/vmss-prototype | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/vmss-prototype/vmss-prototype b/vmss-prototype/vmss-prototype
index ac2d374..6d1d46e 100755
--- a/vmss-prototype/vmss-prototype
+++ b/vmss-prototype/vmss-prototype
@@ -674,6 +674,7 @@ def vmss_prototype_update(sub_args):
                  '--overwrite'
                  ], retries=3, retry_func=not_found_no_retry)
 
+            nodeDeleted = False
             try:
                 # Gracefully stop workloads on this VM.
                 run(['kubectl', 'cordon',
@@ -729,26 +730,29 @@ def vmss_prototype_update(sub_args):
 
                 # Delete the instance to prevent idns side-effects from future VMs coming online based on its prototype
                 # See https://github.com/jackfrancis/kamino/issues/26
-                run(az(['vmss', 'delete-instances'], subscription) + [
+                stdout, stderr, exit_code = run(az(['vmss', 'delete-instances'], subscription) + [
                     '--resource-group', resource_group,
                     '--name', vmss,
                     '--instance-ids', instance_id,
                     '--no-wait'
                     ], retries=3, check=False, retry_func=not_found_no_retry)
+                if exit_code == 0:
+                    nodeDeleted = True
 
             finally:
-                # Let it be a productive member of the cluster again
-                # We ignore errors here since the VM may no longer exist
-                # We best-effort uncordon it.
-                run(['kubectl', 'uncordon',
-                     target_node
-                     ], retries=3, check=False, retry_func=not_found_no_retry)
+                if not nodeDeleted:
+                    # Let it be a productive member of the cluster again
+                    # We ignore errors here since the VM may no longer exist
+                    # We best-effort uncordon it.
+                    run(['kubectl', 'uncordon',
+                        target_node
+                        ], retries=3, check=False, retry_func=not_found_no_retry)
 
-                # Allow the cluster from scaling away the node...
-                run(['kubectl', 'annotate', 'node',
-                     target_node,
-                     'cluster-autoscaler.kubernetes.io/scale-down-disabled-'
-                     ], retries=3, check=False, retry_func=not_found_no_retry)
+                    # Allow the cluster from scaling away the node...
+                    run(['kubectl', 'annotate', 'node',
+                        target_node,
+                        'cluster-autoscaler.kubernetes.io/scale-down-disabled-'
+                        ], retries=3, check=False, retry_func=not_found_no_retry)
 
         # Build the image version from the snapshow we have
         output = vmss_build_sig_image(subscription, resource_group, sig_name, image_definition, version, snapshot)