This repository has been archived by the owner on Oct 25, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
/
02-kured.yaml
154 lines (153 loc) · 4.63 KB
/
02-kured.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
apiVersion: v1
kind: ConfigMap
metadata:
name: kured-config
namespace: kube-system
data:
KURED_SLACK_URL: "https://hooks.slack.com/services/<...>"
KURED_SLACK_NAME: "kured"
KURED_SLACK_CHANNEL: "alerting"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kured
rules:
# Allow kured to read spec.unschedulable
# Allow kubectl to drain/uncordon
#
# NB: These permissions are tightly coupled to the bundled version of kubectl; the ones below
# match https://github.com/kubernetes/kubernetes/blob/v1.19.4/staging/src/k8s.io/kubectl/pkg/cmd/drain/drain.go
#
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "patch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["list","delete","get"]
- apiGroups: ["apps"]
resources: ["daemonsets"]
verbs: ["get"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kured
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kured
subjects:
- kind: ServiceAccount
name: kured
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
namespace: kube-system
name: kured
rules:
# Allow kured to lock/unlock itself
- apiGroups: ["apps"]
resources: ["daemonsets"]
resourceNames: ["kured"]
verbs: ["update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
namespace: kube-system
name: kured
subjects:
- kind: ServiceAccount
namespace: kube-system
name: kured
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: kured
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: kured
namespace: kube-system
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kured # Must match `--ds-name`
namespace: kube-system # Must match `--ds-namespace`
spec:
selector:
matchLabels:
name: kured
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
name: kured
spec:
serviceAccountName: kured
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
hostPID: true # Facilitate entering the host mount namespace via init
restartPolicy: Always
containers:
- name: kured
image: docker.io/weaveworks/kured:1.6.1
# If you find yourself here wondering why there is no
# :latest tag on Docker Hub,see the FAQ in the README
imagePullPolicy: IfNotPresent
securityContext:
privileged: true # Give permission to nsenter /proc/1/ns/mnt
env:
# Pass in the name of the node on which this pod is scheduled
# for use with drain/uncordon operations and lock acquisition
- name: KURED_NODE_ID
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: KURED_SLACK_URL
valueFrom:
configMapKeyRef:
name: kured-config
key: KURED_SLACK_URL
- name: KURED_SLACK_NAME
valueFrom:
configMapKeyRef:
name: kured-config
key: KURED_SLACK_NAME
- name: KURED_SLACK_CHANNEL
valueFrom:
configMapKeyRef:
name: kured-config
key: KURED_SLACK_CHANNEL
command:
- /usr/bin/kured
args:
- "--slack-hook-url=$(KURED_SLACK_URL)"
- "--slack-username=$(KURED_SLACK_NAME)"
- "--slack-channel=$(KURED_SLACK_CHANNEL)"
# - --alert-filter-regexp=^RebootRequired$
# - --blocking-pod-selector=runtime=long,cost=expensive
# - --blocking-pod-selector=name=temperamental
# - --blocking-pod-selector=...
# - --ds-name=kured
# - --ds-namespace=kube-system
# - --end-time=23:59:59
# - --lock-annotation=weave.works/kured-node-lock
# - --period=1h
# - --prometheus-url=http://prometheus.monitoring.svc.cluster.local
# - --reboot-days=sun,mon,tue,wed,thu,fri,sat
# - --reboot-sentinel=/var/run/reboot-required
# - --message-template-drain=Draining node %s
# - --message-template-drain=Rebooting node %s
# - --start-time=0:00
# - --time-zone=UTC