From c83db4864768ac9fbcf625b5f9fc3cd7ae491740 Mon Sep 17 00:00:00 2001 From: Michael Sinz <36865706+Michael-Sinz@users.noreply.github.com> Date: Thu, 10 Jun 2021 13:27:20 -0700 Subject: [PATCH] Add a job TTL such that they don't hang around forever (#75) * Fix comment line location - it somehow got into the wrong spot. * Add a job TTL such that they don't hang around forever After a job completes, the pod does not need to stay on the cluster as a "completed" pod forever. It just takes up kubernetes state space when it has long since completed. Keeping it around for a while after completing allows for inspection but once that time has been reached, it should evaporate and no longer take up state space. (Jobs don't restart once completed so they are not very useful and in larger clusters the state space becomes a bottleneck) --- helm/vmss-prototype/Chart.yaml | 2 +- helm/vmss-prototype/templates/vmss-prototype.yaml | 1 + helm/vmss-prototype/values.yaml | 6 ++++++ vmss-prototype/vmss-prototype | 2 +- 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/helm/vmss-prototype/Chart.yaml b/helm/vmss-prototype/Chart.yaml index ac97791..ceb3b52 100644 --- a/helm/vmss-prototype/Chart.yaml +++ b/helm/vmss-prototype/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 description: A Helm chart for the Kamino vmss-prototype pattern image generator name: vmss-prototype -version: 0.0.12 +version: 0.0.13 maintainers: - name: Michael Sinz email: msinz@microsoft.com diff --git a/helm/vmss-prototype/templates/vmss-prototype.yaml b/helm/vmss-prototype/templates/vmss-prototype.yaml index 2f82a58..989686f 100644 --- a/helm/vmss-prototype/templates/vmss-prototype.yaml +++ b/helm/vmss-prototype/templates/vmss-prototype.yaml @@ -48,6 +48,7 @@ spec: {{- end }} # This is indented like it is under either the Job.spec or CronJob.spec.jobTemplate.spec + ttlSecondsAfterFinished: {{ .Values.kamino.jobTtl }} template: metadata: labels: diff --git a/helm/vmss-prototype/values.yaml b/helm/vmss-prototype/values.yaml index c30a000..dfa9f11 100644 --- a/helm/vmss-prototype/values.yaml +++ b/helm/vmss-prototype/values.yaml @@ -21,6 +21,12 @@ kamino: # Minimum is 2. imageHistory: 3 + # Number of seconds after the job completes before it is cleaned up + # see https://kubernetes.io/docs/concepts/workloads/controllers/job/#ttl-mechanism-for-finished-jobs + # This has it clean up the pod from the cluster within an hour, just to + # help reduce left over state in the cluster. + jobTtl: 3600 + drain: # Drain grace period is the maximum time to allow pods to drain load # and leave the node. The default of 300 seconds is relatively long diff --git a/vmss-prototype/vmss-prototype b/vmss-prototype/vmss-prototype index 8d2b9a7..85ef44f 100755 --- a/vmss-prototype/vmss-prototype +++ b/vmss-prototype/vmss-prototype @@ -401,6 +401,7 @@ def image_tweaks(node_name): # Give kubernetes a few moments to notice we are running # as the rest of this stuff really just happens quickly '/bin/sleep 4', + # Update an ancestry.log '/bin/echo "$(/bin/date) VMSS-Prototype Donor: $(/bin/hostname)" >> /var/log/ancestry.log', # Multiple lines so it is easier to read all of the different # items we are cleaning up (removing) @@ -420,7 +421,6 @@ def image_tweaks(node_name): ' /var/lib/waagent/GoalState.*.xml' ' /var/lib/waagent/*.manifest.xml' '', - # Update an ancestry.log # This forces the machine-id to be re-issued '/bin/cp /dev/null /etc/machine-id', # Finally, we need to power off now