From 84f77c8fce05de4ef3d03efbd7fb45bbef017cad Mon Sep 17 00:00:00 2001 From: Jacob Blain Christen Date: Wed, 16 Sep 2020 10:14:00 -0700 Subject: [PATCH] drain: guidance for avoiding hung upgrades (#104) - updated readme - updated k3s-upgrade example --- README.md | 13 +++++++------ examples/k3s-upgrade.yaml | 7 ++++--- pkg/apis/go.mod | 2 +- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 414acdc5..59b2452c 100644 --- a/README.md +++ b/README.md @@ -123,16 +123,17 @@ spec: command: [sh, -c] args: ["echo '### ENV ###'; env | sort; echo '### RUN ###'; find /run/system-upgrade | sort"] - # If left unspecified, no drain will be performed - # See https://kubernetes.io/docs/tasks/administer-cluster/safely-drain-node/#use-kubectl-drain-to-remove-a-node-from-service + # If left unspecified, no drain will be performed. + # See: + # - https://kubernetes.io/docs/tasks/administer-cluster/safely-drain-node/ + # - https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands#drain drain: # deleteLocalData: true # default # ignoreDaemonSets: true # default force: true - #disableEviction: false # default - #disableEviction flag option is only available in kubectl v1.18 or later, to force drain pods with pod disruption budget. - #skipWaitForDeleteTimeout : 0 # default - #skipWaitForDeleteTimeout flag option is only available in kubectl v1.18 or later, If pod DeletionTimestamp older than N seconds, skip waiting for the pod. Seconds must be greater than 0 to skip. + # Use `disableEviction == true` and/or `skipWaitForDeleteTimeout > 0` to prevent upgrades from hanging on small clusters. + # disableEviction: false # default, only available with kubectl >= 1.18 + # skipWaitForDeleteTimeout: 0 # default, only available with kubectl >= 1.18 # If `drain` is specified, the value for `cordon` is ignored. # If neither `drain` nor `cordon` are specified and the node is marked as `schedulable=false` it will not be marked as `schedulable=true` when the apply job completes. diff --git a/examples/k3s-upgrade.yaml b/examples/k3s-upgrade.yaml index c9a1a768..2c8721f6 100644 --- a/examples/k3s-upgrade.yaml +++ b/examples/k3s-upgrade.yaml @@ -10,7 +10,7 @@ metadata: k3s-upgrade: server spec: concurrency: 1 - version: v1.17.4+k3s1 + version: v1.18.8+k3s1 nodeSelector: matchExpressions: - {key: k3s-upgrade, operator: Exists} @@ -33,8 +33,8 @@ metadata: labels: k3s-upgrade: agent spec: - concurrency: 2 - version: v1.17.4+k3s1 + concurrency: 2 # in general, this should be the number of workers - 1 + version: v1.18.8+k3s1 nodeSelector: matchExpressions: - {key: k3s-upgrade, operator: Exists} @@ -50,5 +50,6 @@ spec: args: ["prepare", "k3s-server"] drain: force: true + skipWaitForDeleteTimeout: 60 # set this to prevent upgrades from hanging on small clusters since k8s v1.18 upgrade: image: rancher/k3s-upgrade diff --git a/pkg/apis/go.mod b/pkg/apis/go.mod index bbe700e7..b07f7488 100644 --- a/pkg/apis/go.mod +++ b/pkg/apis/go.mod @@ -3,7 +3,7 @@ module github.com/rancher/system-upgrade-controller/pkg/apis go 1.14 require ( - github.com/rancher/wrangler v0.5.4-0.20200326191509-4054411d9736 + github.com/rancher/wrangler v0.6.1 github.com/sirupsen/logrus v1.4.2 k8s.io/apimachinery v0.18.0 )