From 3221f4a9097ae65ca72d449d589365e0a0db742b Mon Sep 17 00:00:00 2001 From: Jack Francis Date: Tue, 26 Jan 2021 15:52:34 -0800 Subject: [PATCH] fix: don't uncordon and enable cluster-autoscaler after node is deleted (#42) --- vmss-prototype/vmss-prototype | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/vmss-prototype/vmss-prototype b/vmss-prototype/vmss-prototype index ac2d374..6d1d46e 100755 --- a/vmss-prototype/vmss-prototype +++ b/vmss-prototype/vmss-prototype @@ -674,6 +674,7 @@ def vmss_prototype_update(sub_args): '--overwrite' ], retries=3, retry_func=not_found_no_retry) + nodeDeleted = False try: # Gracefully stop workloads on this VM. run(['kubectl', 'cordon', @@ -729,26 +730,29 @@ def vmss_prototype_update(sub_args): # Delete the instance to prevent idns side-effects from future VMs coming online based on its prototype # See https://github.com/jackfrancis/kamino/issues/26 - run(az(['vmss', 'delete-instances'], subscription) + [ + stdout, stderr, exit_code = run(az(['vmss', 'delete-instances'], subscription) + [ '--resource-group', resource_group, '--name', vmss, '--instance-ids', instance_id, '--no-wait' ], retries=3, check=False, retry_func=not_found_no_retry) + if exit_code == 0: + nodeDeleted = True finally: - # Let it be a productive member of the cluster again - # We ignore errors here since the VM may no longer exist - # We best-effort uncordon it. - run(['kubectl', 'uncordon', - target_node - ], retries=3, check=False, retry_func=not_found_no_retry) + if not nodeDeleted: + # Let it be a productive member of the cluster again + # We ignore errors here since the VM may no longer exist + # We best-effort uncordon it. + run(['kubectl', 'uncordon', + target_node + ], retries=3, check=False, retry_func=not_found_no_retry) - # Allow the cluster from scaling away the node... - run(['kubectl', 'annotate', 'node', - target_node, - 'cluster-autoscaler.kubernetes.io/scale-down-disabled-' - ], retries=3, check=False, retry_func=not_found_no_retry) + # Allow the cluster from scaling away the node... + run(['kubectl', 'annotate', 'node', + target_node, + 'cluster-autoscaler.kubernetes.io/scale-down-disabled-' + ], retries=3, check=False, retry_func=not_found_no_retry) # Build the image version from the snapshow we have output = vmss_build_sig_image(subscription, resource_group, sig_name, image_definition, version, snapshot)