Adding * for index to get metadata name (#1005)

Co-authored-by: Jayasimha Raghavan <[email protected]>
unskript · Mar 1, 2024 · 024478a · 024478a
1 parent 64b1247
commit 024478a
Showing 1 changed file with 87 additions and 44 deletions.
diff --git a/Kubernetes/legos/k8s_check_service_pvc_utilization/k8s_check_service_pvc_utilization.py b/Kubernetes/legos/k8s_check_service_pvc_utilization/k8s_check_service_pvc_utilization.py
@@ -97,76 +97,119 @@ def k8s_check_service_pvc_utilization(handle, service_name: str = "", namespace:
         labels_dict = json.loads(response.stdout.replace("'", "\""))
         label_selector = ",".join([f"{k}={v}" for k, v in labels_dict.items()])
 
-        # Fetch the pod attached to this service
-        get_pod_command = f"kubectl get pods -n {namespace} -l {label_selector} -o=jsonpath='{{.items[0].metadata.name}}'"
+        # Fetch the pod attached to this service.
+        # The safer option is to try with the * option. Having a specific index like 0 or 1
+        # will lead to ApiException. 
+        get_pod_command = f"kubectl get pods -n {namespace} -l {label_selector} -o=jsonpath='{{.items[*].metadata.name}}'"
         response = handle.run_native_cmd(get_pod_command)
         if not response or response.stderr:
             raise ApiException(f"Error while executing command ({get_pod_command}): {response.stderr if response else 'empty response'}")
 
-        pod_name = response.stdout.strip()
-        if not pod_name:
-            print(f"No pods found for service {svc} in namespace {namespace} with labels {label_selector}")
+        # pod_names stores the output from the above kubectl command, which is a list of pod_names separated by space
+        pod_names = response.stdout.strip()
+        if not pod_names:
+            # No pods found for service {svc} in namespace {namespace} with labels {label_selector}
             continue
 
         # Fetch PVCs attached to the pod
-        get_pvc_names_command = f"kubectl get pod {pod_name} -n {namespace} -o=jsonpath='{{.spec.volumes[*].persistentVolumeClaim.claimName}}'"
+        # The Above kubectl command would return a string that is space separated name(s) of the pod. 
+        # Given such a string, lets find out if we have one or more than one pod name in the string.
+        # If there are more than one pod name in the output, we need to iterate over all items[] array.
+        # Else we can directly access the persistentVolumeClaim name 
+        # Lets also associate the pod_name along with the claim name (PVC Name) in the format of
+        # pod_name:pv_claim_name
+
+        if len(pod_names.split()) > 1:
+            json_path_cmd = "{range .items[*]}{.metadata.name}:{range .spec.volumes[*].persistentVolumeClaim}{.claimName} {end}{\"\\n\"}{end}"
+        else:
+            json_path_cmd = "{.metadata.name}:{range .spec.volumes[*].persistentVolumeClaim}{.claimName}{end}"
+
+        get_pvc_names_command = f"kubectl get pod {pod_names} -n {namespace} -o=jsonpath='{json_path_cmd}'"
+
+
         response = handle.run_native_cmd(get_pvc_names_command)
         if not response or response.stderr:
             raise ApiException(f"Error while executing command ({get_pvc_names_command}): {response.stderr if response else 'empty response'}")
-        pvc_names = response.stdout.strip().split()
-
-        # If there are no PVCs for this service, continue to the next one
-        if not pvc_names:
+        # Example: ['lightbeam-elasticsearch-master-0:data-lightbeam-elasticsearch-master-0']
+        pod_and_pvc_names = response.stdout.strip().split()
+
+
+        # The pod_and_pvc_names 
+        if not pod_and_pvc_names:
             services_without_pvcs.append(svc)
             continue
 
-        # Fetch the Pod JSON
-        get_pod_json_command = f"kubectl get pod {pod_name} -n {namespace} -o json"
-        pod_json_output = handle.run_native_cmd(get_pod_json_command)
-        if not pod_json_output or pod_json_output.stderr:
-            raise ApiException(f"Error fetching pod json for {pod_name}: {pod_json_output.stderr if pod_json_output else 'empty response'}")
-        pod_data = json.loads(pod_json_output.stdout)
-
-        pvc_mounts = [
-            {"container_name": container['name'],
-            "mount_path": mount['mountPath'],
-            "pvc_name": volume['persistentVolumeClaim']['claimName']}
-            for container in pod_data['spec']['containers']
-            for mount in container.get('volumeMounts', [])
-            for volume in pod_data['spec']['volumes']
-            if 'persistentVolumeClaim' in volume and volume['name'] == mount['name']
-        ]
-
+        pvc_mounts = []
         alert_pvcs = []
         all_pvcs = []
+
+        for element in pod_and_pvc_names:
+            pod_name, claim_name = element.split(':')
+            if not claim_name:
+                # Skip if Volume Claim name is empty.
+                continue 
+
+            # Fetch the Pod JSON 
+            # We need to get the container name (if any) from the Pod's JSON. This is needed
+            # if we want to exec into the POD that is within a container. The JSON data that
+            # we obtain is used to fill the pvc_mounts list, which is a list of dictionaries.
+            # We use this pvc_mounts to find out the used_space percentage. We compare that with
+            # the threshold to flag if the utilization is above threshold. 
+            # df -kh is the command used to get the disk utilization. This is accurate as we get
+            # the disk utilization from the POD directly, rather than checking the resource limit
+            # and resource request from the deployment / stateful YAML file. 
+            get_pod_json_command = f"kubectl get pod {pod_name} -n {namespace} -o json"
+            pod_json_output = handle.run_native_cmd(get_pod_json_command)
+            if not pod_json_output or pod_json_output.stderr:
+                raise ApiException(f"Error fetching pod json for {pod_name}: {pod_json_output.stderr if pod_json_output else 'empty response'}")
+            pod_data = json.loads(pod_json_output.stdout)
+
+            # Dictionary .get() method with default value is way of error handling
+            for container in pod_data.get('spec', {}).get('containers', {}):
+                for mount in container.get('volumeMounts', {}):
+                    for volume in pod_data.get('spec', {}).get('volumes', {}):
+                        if 'persistentVolumeClaim' in volume and volume.get('name') == mount.get('name'):
+                            try:
+                                claim_name = volume['persistentVolumeClaim']['claimName']
+                                pvc_mounts.append({
+                                    "container_name": container['name'],
+                                    "mount_path": mount['mountPath'],
+                                    "pvc_name": claim_name if claim_name else None
+                                })
+                            except KeyError as e:
+                                # Handle the KeyError (e.g., log the error, skip this iteration, etc.)
+                                print(f"KeyError: {e}. Skipping this entry.")
+                            except IndexError as e:
+                                # Handle the IndexError (e.g., log the error, skip this iteration, etc.)
+                                print(f"IndexError: {e}. Skipping this entry.")
 
+
+        all_mounts = [mount.get('mount_path') for mount in pvc_mounts]
+        all_mounts = " ".join(all_mounts).strip()
         for mount in pvc_mounts:
             container_name = mount['container_name']
             mount_path = mount['mount_path']
             pvc_name = mount['pvc_name']
             all_pvcs.append({"pvc_name": pvc_name, "mount_path": mount_path, "used": None, "capacity": None})
 
-        all_mounts = [mount.get('mount_path') for mount in pvc_mounts]
-        all_mounts = " ".join(all_mounts).strip()
-        du_command = f"kubectl exec -n {namespace} {pod_name} -c {container_name} -- df -kh {all_mounts} | grep -v Filesystem"
-        du_output = handle.run_native_cmd(du_command)
-
-
-        if du_output and not du_output.stderr:
-            used_space = du_output.stdout.strip()
-            for idx, space in enumerate([used_space]):
-                space = space.split()
-                used_percentage = int(space[-2].replace('%', ''))
-                total_capacity_str = space[1].replace('%', '')
-                all_pvcs[idx]["used"] = used_percentage
-                all_pvcs[idx]["capacity"] =  total_capacity_str
-                if used_percentage > threshold:
-                    alert_pvcs.append(all_pvcs[idx])
+            du_command = f"kubectl exec -n {namespace} {pod_name} -c {container_name} -- df -kh {all_mounts} | grep -v Filesystem"
+            du_output = handle.run_native_cmd(du_command)
+
+            if du_output and not du_output.stderr:
+                used_space = du_output.stdout.strip()
+                for idx, space in enumerate([used_space]):
+                    space = space.split()
+                    used_percentage = int(space[-2].replace('%', ''))
+                    total_capacity_str = space[1].replace('%', '')
+                    all_pvcs[idx]["used"] = used_percentage
+                    all_pvcs[idx]["capacity"] =  total_capacity_str
+                    if used_percentage > threshold:
+                        alert_pvcs.append(all_pvcs[idx])
 
         alert_pvcs_all_services.extend(alert_pvcs)
     if services_without_pvcs:
         print("Following services do not have any PVCs attached:")
         for service in services_without_pvcs:
             print(f"- {service}")
 
-    return (not bool(alert_pvcs_all_services), alert_pvcs_all_services)
+    return (not bool(alert_pvcs_all_services), alert_pvcs_all_services)