forked from rancher/rancher
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ad-guid-unmigration.sh
executable file
·280 lines (252 loc) · 8.03 KB
/
ad-guid-unmigration.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
#!/bin/bash
# set -x
set -e
# Text to display in the banner
banner_text="This utility will go through all Rancher users and perform an Active Directory lookup using
the configured service account to get the user's distinguished name. Next, it will perform lookups inside Rancher
for all the user's Tokens, ClusterRoleTemplateBindings, and ProjectRoleTemplateBindings. If any of those objects,
including the user object itself are referencing a principalID based on the GUID of that user, those objects will be
updated to reference the distinguished name-based principalID (unless the utility is run with -dry-run, in that case
the only results are log messages indicating the changes that would be made by a run without that flag).
This utility will also detect and correct the case where a single ActiveDirectory GUID is mapped to multiple Rancher
users. That condition was likely caused by a race in the original migration to use GUIDs and resulted in a second
Rancher user being created. This caused Rancher logins to fail for the duplicated user. The utility remedies
that situation by mapping any tokens and bindings to the original user before removing the newer user, which was
created in error.
It is also important to note that migration of ClusterRoleTemplateBindings and ProjectRoleTemplateBindings require
a delete/create operation rather than an update. This will result in new object names for the migrated bindings.
A label with the former object name will be included in the migrated bindings.
The Rancher Agent image to be used with this utility can be found at rancher/rancher-agent:v2.7.6
It is recommended that you perform a Rancher backup prior to running this utility."
CLEAR='\033[0m'
RED='\033[0;31m'
# cluster resources, including the service account used to run the script
cluster_resources_yaml=$(cat << 'EOF'
apiVersion: v1
kind: ServiceAccount
metadata:
name: cattle-cleanup-sa
namespace: cattle-system
labels:
rancher-cleanup: "true"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cattle-cleanup-binding
labels:
rancher-cleanup: "true"
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cattle-cleanup-role
subjects:
- kind: ServiceAccount
name: cattle-cleanup-sa
namespace: cattle-system
---
apiVersion: batch/v1
kind: Job
metadata:
name: cattle-cleanup-job
namespace: cattle-system
labels:
rancher-cleanup: "true"
spec:
backoffLimit: 6
completions: 1
parallelism: 1
selector:
template:
metadata:
creationTimestamp: null
spec:
containers:
- env:
- name: AD_GUID_CLEANUP
value: "true"
#dryrun - name: DRY_RUN
#dryrun value: "true"
#deletemissing - name: AD_DELETE_MISSING_GUID_USERS
#deletemissing value: "true"
#debug - name: RANCHER_DEBUG
#debug value: "true"
image: agent_image
imagePullPolicy: Always
command: ["agent"]
name: cleanup-agent
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
dnsPolicy: ClusterFirst
restartPolicy: OnFailure
schedulerName: default-scheduler
securityContext: {}
serviceAccountName: cattle-cleanup-sa
terminationGracePeriodSeconds: 30
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cattle-cleanup-role
labels:
rancher-cleanup: "true"
rules:
- apiGroups:
- '*'
resources:
- '*'
verbs:
- '*'
- nonResourceURLs:
- '*'
verbs:
- '*'
EOF
)
# Agent image to use in the yaml file
agent_image="$1"
show_usage() {
if [ -n "$1" ]; then
echo -e "${RED}👉 $1${CLEAR}\n";
fi
echo "Usage: $0 AGENT_IMAGE [OPTIONS]"
echo ""
echo "Options:"
echo -e "\t-h, --help Display this help message"
echo -e "\t-n, --dry-run Display the resources that would be updated without making changes"
echo -e "\t-d, --delete-missing Permanently remove user objects whose GUID cannot be found in Active Directory"
echo -e "\t-v, --debug Run with extra debug verbosity"
}
display_banner() {
local text="$1"
local border_char="="
local text_width=$(($(tput cols)))
local border=$(printf "%${text_width}s" | tr " " "$border_char")
echo "$border"
printf "%-${text_width}s \n" "$text"
echo "$border"
echo "Dry run: $dry_run"
echo "Delete missing: $delete_missing"
echo "Agent image: $agent_image"
echo "Debug: $debug_mode"
if [[ "$dry_run" = true ]] && [[ "$delete_missing" = true ]]
then
echo "Setting the dry-run option to true overrides the delete-missing option. NO CHANGES WILL BE MADE."
fi
echo "$border"
}
OPTS=$(getopt -o hndv -l help,dry-run,delete-missing,debug -- "$@")
if [ $? != 0 ]; then
show_usage "Invalid option"
exit 1
fi
eval set -- "$OPTS"
dry_run=false
delete_missing=false
debug_mode=false
while true; do
case "$1" in
-h | --help)
show_usage
exit 0
;;
-n | --dry-run)
dry_run=true
shift
;;
-d | --delete-missing)
delete_missing=true
shift
;;
-v | --debug)
debug_mode=true
shift
;;
--)
shift
break
;;
*)
show_usage "Invalid option"
exit 1
;;
esac
done
shift "$((OPTIND - 1))"
# Ensure AGENT_IMAGE is provided
if [ $# -lt 1 ]; then
show_usage "AGENT_IMAGE is a required argument"
exit 1
fi
display_banner "${banner_text}"
if [ "$dry_run" != true ]
then
# Check the Rancher version before doing anything.
# If it is v2.7.5, make it clear that configuration is not the recommended way to run this utility.
rancher_version=$(kubectl get settings server-version --template='{{.value}}')
if [ "$rancher_version" = "v2.7.5" ]; then
echo -e "${RED}IT IS NOT RECOMMENDED TO RUN THIS UTILITY AGAINST RANCHER VERSION v2.7.5${CLEAR}"
echo -e "${RED}IF RANCHER v.2.7.5 RESTARTS AFTER RUNNING THIS UTILITY, IT WILL UNDO THE EFFECTS OF THIS UTILITY.${CLEAR}"
echo -e "${RED}IF YOU DO WANT TO RUN THIS UTILITY, IT IS RECOMMENDED THAT YOU MAKE A BACKUP PRIOR TO CONTINUING.${CLEAR}"
read -p "Do you want to continue? (y/n): " choice
if [[ ! $choice =~ ^[Yy]$ ]]; then
echo "Exiting..."
exit 0
fi
fi
fi
read -p "Do you want to continue? (y/n): " choice
if [[ ! $choice =~ ^[Yy]$ ]]; then
echo "Exiting..."
exit 0
fi
# apply the provided rancher agent
yaml=$(sed -e 's=agent_image='"$agent_image"'=' <<< $cluster_resources_yaml)
if [ "$dry_run" = true ]
then
# Uncomment the env var for dry-run mode
yaml=$(sed -e 's/#dryrun // ' <<< "$yaml")
elif [ "$delete_missing" = true ]
then
# Uncomment the env var for missing user cleanup
yaml=$(sed -e 's/#deletemissing // ' <<< "$yaml")
fi
if [ "$debug_mode" = true ]
then
# Uncomment the env var for debug logging
yaml=$(sed -e 's/#debug // ' <<< "$yaml")
fi
echo "$yaml" | kubectl apply -f -
# Get the pod ID to tail the logs
retry_interval=1
max_retries=10
retry_count=0
pod_id=""
while [ $retry_count -lt $max_retries ]; do
pod_id=$(kubectl --namespace=cattle-system get pod -l job-name=cattle-cleanup-job -o jsonpath="{.items[0].metadata.name}")
if [ -n "$pod_id" ]; then
break
else
sleep $retry_interval
((retry_count++))
fi
done
# 600 is equal to 5 minutes, because the sleep interval is 0.5 seconds
job_start_timeout=600
declare -i count=0
until kubectl --namespace=cattle-system logs $pod_id -f
do
if [ $count -gt $job_start_timeout ]
then
echo "Timeout reached, check the job by running kubectl --namespace=cattle-system get jobs"
echo "To cleanup manually, you can run:"
echo " kubectl --namespace=cattle-system delete serviceaccount,job -l rancher-cleanup=true"
echo " kubectl delete clusterrole,clusterrolebinding -l rancher-cleanup=true"
exit 1
fi
sleep 0.5
count+=1
done
# Cleanup after it completes successfully
echo "$yaml" | kubectl delete -f -