Add keys to checks result (#1053)

unskript · May 29, 2024 · 84e7a51 · 84e7a51
1 parent d359431
commit 84e7a51
Show file tree

Hide file tree

Showing 22 changed files with 125 additions and 139 deletions.
diff --git a/AWS/legos/aws_check_ssl_certificate_expiry/aws_check_ssl_certificate_expiry.py b/AWS/legos/aws_check_ssl_certificate_expiry/aws_check_ssl_certificate_expiry.py
@@ -62,6 +62,7 @@ def aws_check_ssl_certificate_expiry(handle, threshold_days: int, region: str,)
                 right_now = datetime.datetime.now(dateutil.tz.tzlocal())
                 diff = expiry_date-right_now
                 days_remaining = diff.days
+                days = 0
                 if 0 < days_remaining < threshold_days:
                     days = days_remaining
                 elif days_remaining < 0:

diff --git a/AWS/legos/aws_ebs_modify_volume/aws_ebs_modify_volume.py b/AWS/legos/aws_ebs_modify_volume/aws_ebs_modify_volume.py
@@ -74,10 +74,15 @@ def aws_ebs_modify_volume(
     # Get the current volume size.
     Volume = ec2Resource.Volume(volume_id)
     currentSize = Volume.size
+    newSize = None
+
     if resize_option == SizingOption.Add:
         newSize = currentSize + resize_value
-    elif resize_option == SizingOption.Mutiple:
+    elif resize_option == SizingOption.Multiple:
         newSize = currentSize * resize_value
+    else:
+        raise ValueError(f"Invalid resize option: {resize_option}")
+
 
     print(f'CurrentSize {currentSize}, NewSize {newSize}')
 

diff --git a/AWS/legos/aws_ecs_detect_failed_deployment/aws_ecs_detect_failed_deployment.py b/AWS/legos/aws_ecs_detect_failed_deployment/aws_ecs_detect_failed_deployment.py
@@ -58,6 +58,7 @@ def aws_ecs_detect_failed_deployment(handle, cluster_name: str, service_name: st
         return ["Empty deployment"]
 
     deploymentInProgress = False
+    primaryDeploymentID = ""
     for deployment in deployments:
         if deployment['status'] == "PRIMARY":
             primaryDeploymentID = deployment['id']

diff --git a/ElasticSearch/legos/elasticsearch_check_health_status/elasticsearch_check_health_status.py b/ElasticSearch/legos/elasticsearch_check_health_status/elasticsearch_check_health_status.py
@@ -27,38 +27,33 @@ def elasticsearch_check_health_status(handle, unassigned_shards:int = 20) -> Tup
 
             :rtype: Result Tuple of result
     """
-    cluster_health = {}
     output = handle.web_request("/_cluster/health?pretty", "GET", None)
-
+
+    # Early return if cluster status is green
     if output['status'] == 'green':
         return (True, None)
-
-    cluster_health['cluster_name'] = output['cluster_name']
-    cluster_health['status'] = output['status']
-
-    # Additional checks for both red and yellow statuses
-    additional_details = False
+
+    cluster_health = {
+        "cluster_name": output['cluster_name'],
+        "status": output['status'],
+        "unassigned_shards": output['unassigned_shards']
+    }
+
+    # Check for significant health issues
     if output['unassigned_shards'] > unassigned_shards:
-        cluster_health['unassigned_shards'] = output['unassigned_shards']
-        additional_details = True
-
-    if output['delayed_unassigned_shards'] > 0:
-        cluster_health['delayed_unassigned_shards'] = output['delayed_unassigned_shards']
-        additional_details = True
-
-    if output['initializing_shards'] > 0 or output['relocating_shards'] > 0:
-        cluster_health['initializing_shards'] = output['initializing_shards']
-        cluster_health['relocating_shards'] = output['relocating_shards']
-        additional_details = True
-
-    if output['number_of_nodes'] != output['number_of_data_nodes']:
-        cluster_health['number_of_nodes'] = output['number_of_nodes']
-        cluster_health['number_of_data_nodes'] = output['number_of_data_nodes']
-        additional_details = True
-
-    # If status is red, return the result immediately
-    if output['status'] == 'red' or (output['status'] == 'yellow' and additional_details):
+        return (False, [cluster_health])  # Return immediately if unassigned shards exceed the threshold
+
+    # Additional checks for severe conditions
+    if output['status'] == 'red' or output['delayed_unassigned_shards'] > 0 or output['initializing_shards'] > 0 or output['relocating_shards'] > 0 or output['number_of_nodes'] != output['number_of_data_nodes']:
+        additional_details = {
+            "delayed_unassigned_shards": output['delayed_unassigned_shards'],
+            "initializing_shards": output['initializing_shards'],
+            "relocating_shards": output['relocating_shards'],
+            "number_of_nodes": output['number_of_nodes'],
+            "number_of_data_nodes": output['number_of_data_nodes']
+        }
+        cluster_health.update(additional_details)
         return (False, [cluster_health])
-
-    # If status is yellow but no additional conditions are met, return as healthy
+    
+    # If status is yellow but no additional critical issues, consider it healthy
     return (True, None)
diff --git a/...re_cluster_disk_size_to_threshold/elasticsearch_compare_cluster_disk_size_to_threshold.py b/...re_cluster_disk_size_to_threshold/elasticsearch_compare_cluster_disk_size_to_threshold.py
@@ -39,14 +39,14 @@ def elasticsearch_compare_cluster_disk_size_to_threshold(handle, threshold: floa
     # Split the lines and skip the header
     lines = allocation_output.splitlines()[1:]
 
-    result = []
-    # Find the max disk percentage from the lines, considering only assigned nodes
-    max_disk_percent = max(float(line.split()[5]) for line in lines if "UNASSIGNED" not in line)
-
-    if max_disk_percent > threshold:
-        result.append({"usage_disk_percentage": max_disk_percent, "threshold": threshold})
-
-    if len(result) != 0:
-        return (False, result)
+    # Calculate the max disk percentage from the lines, considering only assigned nodes
+    max_disk_percent = 0  # Initialize to 0 or an appropriately low number
+    for line in lines:
+        if "UNASSIGNED" not in line:
+            disk_usage = float(line.split()[5])
+            max_disk_percent = max(max_disk_percent, disk_usage)
+            if max_disk_percent > threshold:
+                result = [{"usage_disk_percentage": max_disk_percent, "threshold": threshold}]
+                return (False, result)      
     return (True, None)
 
diff --git a/ElasticSearch/legos/elasticsearch_get_index_health/elasticsearch_get_index_health.py b/ElasticSearch/legos/elasticsearch_get_index_health/elasticsearch_get_index_health.py
@@ -51,59 +51,29 @@ def elasticsearch_get_index_health(handle, index_name="") -> Tuple:
     :return: A list of dictionaries where each dictionary contains stats about each index
     """
     try:
-        indices_output = []
-        # If no specific index is provided, get all indices
-        if len(index_name)==0 :
-            indices_output = handle.web_request("/_cat/indices?h=index", "GET", None)
-            indices_output = ''.join(indices_output).split('\n')
-        # If a specific index is provided, only consider that index
+        if index_name:
+            # Fetch specific index health
+            health_url = f"/_cat/indices/{index_name}?v&h=index,status&format=json"
+            health_response = handle.web_request(health_url, "GET", None)
+            index_stats = [health_response[0]] if health_response and 'error' not in health_response else []
         else:
-            indices_output.append(index_name)
-
-        all_indices_stats = []
-
-        for current_index in indices_output:
-            # Skip system indices
-            if current_index.startswith('.'):
-                continue
-            index_stats = {}
-
-            # Get settings for the current index
-            settings_output = handle.web_request(f"/{current_index}/_settings", "GET", None)
-            if "error" in settings_output:
-                print(f"Error for settings of {current_index}")
-                continue
-
-            # Get stats for the current index
-            stats_output = handle.web_request(f"/{current_index}/_stats", "GET", None)
-            if "error" in stats_output:
-                print(f"Error for stats of {current_index}")
-                continue
-
-            # Get any tasks associated with the current index
-            tasks_output = handle.web_request(f"/_tasks?actions=*{current_index}*&detailed", "GET", None)
-
-            # Get health of the current index
-            health_output = handle.web_request(f"/_cat/indices/{current_index}?format=json", "GET", None)
-            if "error" in health_output:
-                print(f"Error for health of {current_index}")
-                continue
-
-            if settings_output:
-                settings = settings_output.get(current_index, {}).get('settings', {}).get('index', {})
-            else:
-                settings = {}
-            # Consolidate stats for the current index
-            if health_output[0]['health'] in ['yellow', 'red']:
-                index_stats = {
-                    **health_output[0],
-                    'settings': settings,
-                    'stats': stats_output['_all']['total'],
-                    'tasks': tasks_output['nodes']
-                }
-                all_indices_stats.append(index_stats)
+            # Fetches all indices health; skips empty lines and system indices
+            health_url = "/_cat/indices?v&h=index,status&format=json"
+            health_response = handle.web_request(health_url, "GET", None)
+            index_stats = [idx for idx in health_response if not idx['index'].startswith('.')] if health_response and 'error' not in health_response else []
+
+        if not index_stats:
+            print(f"No indices found or error retrieving indices: {health_response.get('error', 'No response') if health_response else 'No data'}")
+            return (True, None)
+
+        all_indices_stats = [
+            {"index": idx['index'], "status": idx['status']}
+            for idx in index_stats
+        ]
+
     except Exception as e:
-        raise e
-    if len(all_indices_stats)!=0:
-        return (False, all_indices_stats)
-    return (True, None)
+        print(f"Error processing index health: {str(e)}")
+        return (False, [])
+
+    return (False, all_indices_stats) if all_indices_stats else (True, None)
+
diff --git a/Github/legos/github_create_team/github_create_team.py b/Github/legos/github_create_team/github_create_team.py
@@ -74,6 +74,7 @@ def github_create_team(
     team_details = {}
     repo_names =[]
     list_of_repos = ''
+    privacy_settings = ''
     if privacy is None or len(privacy)==0:
         privacy_settings = "secret"
     organization = handle.get_organization(organization_name)

diff --git a/Github/legos/github_delete_branch/github_delete_branch.py b/Github/legos/github_delete_branch/github_delete_branch.py
@@ -43,6 +43,7 @@ def github_delete_branch(handle, owner:str, repository: str, branch_name: str)->
 
         :rtype: Deleted branch info
     """
+    flag_to_check_branch = 0
     try:
         user = handle.get_user(login=owner)
         repo_name = user.login+"/"+repository

diff --git a/Github/legos/github_remove_member_from_org/github_remove_member_from_org.py b/Github/legos/github_remove_member_from_org/github_remove_member_from_org.py
@@ -35,6 +35,7 @@ def github_remove_member_from_org(handle, organization_name:str, username:str)->
 
         :rtype: List of return status of removing a member from Org
     """
+    result = ""
     organization = handle.get_organization(organization_name)
     try:
         user = handle.get_user(username)

diff --git a/Kubernetes/legos/k8s_check_cronjob_pod_status/k8s_check_cronjob_pod_status.py b/Kubernetes/legos/k8s_check_cronjob_pod_status/k8s_check_cronjob_pod_status.py
@@ -7,11 +7,17 @@
 from typing import Tuple, Optional
 from pydantic import BaseModel, Field
 from croniter import croniter
+from datetime import datetime, timezone, timedelta
 import json
 
 
 class InputSchema(BaseModel):
     namespace: Optional[str] = Field(..., description='k8s Namespace', title='Namespace')
+    time_interval_to_check: int = Field(
+        24,
+        description='Time interval in hours. This time window is used to check if pod in a cronjob was in Pending state. Default is 24 hours.',
+        title="Time Interval"
+    )
 
 
 def k8s_check_cronjob_pod_status_printer(output):
@@ -20,10 +26,12 @@ def k8s_check_cronjob_pod_status_printer(output):
         print("CronJobs are running as expected.")
     else:
         for issue in issues:
-            print(f"CronJob '{issue['cronjob_name']}' Alert: {issue['message']}")
+            print(f"CronJob '{issue['cronjob_name']}' in namespace '{issue['namespace']}' has issues.")
 
+def format_datetime(dt):
+    return dt.strftime("%Y-%m-%d %H:%M:%S %Z")
 
-def k8s_check_cronjob_pod_status(handle, namespace: str='') -> Tuple:
+def k8s_check_cronjob_pod_status(handle, namespace: str='', time_interval_to_check=24) -> Tuple:
     """
     Checks the status of the CronJob pods.
 
@@ -39,7 +47,10 @@ def k8s_check_cronjob_pod_status(handle, namespace: str='') -> Tuple:
     batch_v1 = client.BatchV1Api(api_client=handle)
     core_v1 = client.CoreV1Api(api_client=handle)
 
-    issues = {"NotAssociated": [], "Pending": [], "UnexpectedState": []}
+    issues = []
+    current_time = datetime.now(timezone.utc)
+    interval_time_to_check = current_time - timedelta(hours=time_interval_to_check)
+    interval_time_to_check = interval_time_to_check.replace(tzinfo=timezone.utc)
 
     # Get namespaces to check
     if namespace:
@@ -68,34 +79,35 @@ def k8s_check_cronjob_pod_status(handle, namespace: str='') -> Tuple:
                 continue
             cronjob = json.loads(response.stdout)
 
-            schedule = cronjob['spec']['schedule']
-
-            # Calculate the next expected run
-            now = datetime.now(timezone.utc)
-            iter = croniter(schedule, now)
-            next_run = iter.get_next(datetime)
-            time_to_next_run = next_run - now
-
             # Fetch the most recent Job associated with the CronJob
             jobs = batch_v1.list_namespaced_job(ns)  # Fetch all jobs, and then filter by prefix.
-
             associated_jobs = [job for job in jobs.items if job.metadata.name.startswith(cronjob['metadata']['name'])]
             if not associated_jobs:
                 # If no associated jobs, that means the job is not scheduled.
-                return (True, None)
+                continue
 
             latest_job = sorted(associated_jobs, key=lambda x: x.status.start_time, reverse=True)[0]
 
             # Check job's pods for any issues
             pods = core_v1.list_namespaced_pod(ns, label_selector=f"job-name={latest_job.metadata.name}")
-
             for pod in pods.items:
-                if pod.status.phase == 'Pending' and now - pod.status.start_time > time_to_next_run:
-                    issues["Pending"].append({"pod_name": pod.metadata.name, "namespace": ns})
-                elif pod.status.phase not in ['Running', 'Succeeded']:
-                    issues["UnexpectedState"].append({"pod_name": pod.metadata.name, "namespace": ns, "state": pod.status.phase})
-
-    if all(not val for val in issues.values()):
-        return (True, None)
-    else:
-        return (False, issues)
+                if pod.status.phase == 'Pending':
+                    start_time = pod.status.start_time
+                    if start_time and start_time >= interval_time_to_check:
+                        issues.append({
+                            "cronjob_name": cronjob_name, 
+                            "namespace": ns, 
+                            "pod_name": pod.metadata.name, 
+                            "start_time": format_datetime(start_time)
+                        })
+                        break
+                elif pod.status.phase not in ['Running', 'Succeeded','Completed']:
+                    issues.append({
+                        "cronjob_name": cronjob_name, 
+                        "namespace": ns, 
+                        "pod_name": pod.metadata.name, 
+                        "state": pod.status.phase
+                    })
+                    break
+
+    return (not issues, issues if issues else None)
diff --git a/Kubernetes/legos/k8s_check_worker_cpu_utilization/k8s_check_worker_cpu_utilization.py b/Kubernetes/legos/k8s_check_worker_cpu_utilization/k8s_check_worker_cpu_utilization.py
@@ -48,18 +48,25 @@ def k8s_check_worker_cpu_utilization(handle, threshold: float=70.0) -> Tuple:
     if response is None or response.stderr:
         raise Exception(f"Error while executing command ({kubectl_command}): {response.stderr if response else 'empty response'}")
 
-    lines = response.stdout.split('\n')
+    # Ensure response.stdout is processed only once and correctly
+    lines = response.stdout.strip().split('\n')
+    seen_nodes = set()  # Keep track of nodes that have already been processed
+
     for line in lines:
         parts = line.split()
         if len(parts) < 5:  # Check for correct line format
             continue
-        node_name, cpu_percentage_str = parts[0], parts[2]
-        cpu_percentage = float(cpu_percentage_str.rstrip('%'))
+        node_name, cpu_percentage_str = parts[0], parts[2].rstrip('%')
+        if node_name in seen_nodes:
+            print(f"Duplicate entry detected for node {node_name}, skipping.")
+            continue
+        seen_nodes.add(node_name)
 
+        cpu_percentage = float(cpu_percentage_str)
         if cpu_percentage > threshold:
             exceeding_nodes.append({"node": node_name, "cpu": cpu_percentage})
 
-    if len(exceeding_nodes) != 0:
+    if exceeding_nodes:
         return (False, exceeding_nodes)
     return (True, None)
 

diff --git a/Kubernetes/legos/k8s_get_error_pods_from_all_jobs/k8s_get_error_pods_from_all_jobs.py b/Kubernetes/legos/k8s_get_error_pods_from_all_jobs/k8s_get_error_pods_from_all_jobs.py
@@ -40,7 +40,7 @@ def k8s_get_error_pods_from_all_jobs(handle, namespace:str="") -> Tuple:
 
     if response.stderr:
         raise Exception(f"Error occurred while executing command {kubectl_cmd}: {response.stderr}")
-
+    jobs = {}
     try:
         if response.stdout:
             jobs = json.loads(response.stdout)
@@ -57,6 +57,7 @@ def k8s_get_error_pods_from_all_jobs(handle, namespace:str="") -> Tuple:
         if pod_response.stderr:
             print(f"Error occurred while fetching pods for job {job_name}: {pod_response.stderr}")
             continue
+        pods = {}
         try:
             if response.stdout:
                 pods = json.loads(pod_response.stdout)

diff --git a/Kubernetes/legos/k8s_get_pending_pods/k8s_get_pending_pods.py b/Kubernetes/legos/k8s_get_pending_pods/k8s_get_pending_pods.py
@@ -59,7 +59,7 @@ def k8s_get_pending_pods(handle, namespace:str="") -> Tuple:
         pod_namespace = pod['metadata']['namespace']
 
         if status == 'Pending':
-            pending_pods.append([name, pod_namespace])
+            pending_pods.append({"pod_name":name,"namespace":pod_namespace})
 
     if len(pending_pods) != 0:
         return (False, pending_pods)