From 1515b3d96f50c2b116236739fd157af893a69f2f Mon Sep 17 00:00:00 2001
From: moshemorad <moshe@trustmi.ai>
Date: Tue, 12 Nov 2024 11:15:52 +0200
Subject: [PATCH] Add columns to csv (#359)

---
 .gitignore                             |   2 +-
 robusta_krr/core/models/config.py      |   5 +-
 robusta_krr/core/models/result.py      |   5 +-
 robusta_krr/formatters/csv.py          |  87 +++++---
 robusta_krr/main.py                    |  32 ++-
 tests/formatters/test_csv_formatter.py | 291 +++++++++++++++++++++++++
 tests/test_krr.py                      |   2 +-
 tests/test_runner.py                   |  21 ++
 8 files changed, 405 insertions(+), 40 deletions(-)
 create mode 100644 tests/formatters/test_csv_formatter.py
 create mode 100644 tests/test_runner.py

diff --git a/.gitignore b/.gitignore
index 4a103fe0..cec2b2c3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -133,4 +133,4 @@ dmypy.json
 .DS_Store
 robusta_lib
 .idea
-.vscode
\ No newline at end of file
+.vscode
diff --git a/robusta_krr/core/models/config.py b/robusta_krr/core/models/config.py
index d7c92976..32241ed1 100644
--- a/robusta_krr/core/models/config.py
+++ b/robusta_krr/core/models/config.py
@@ -58,10 +58,11 @@ class Config(pd.BaseSettings):
     strategy: str
     log_to_stderr: bool
     width: Optional[int] = pd.Field(None, ge=1)
+    show_severity: bool = True
 
     # Output Settings
     file_output: Optional[str] = pd.Field(None)
-    file_output_dynamic = bool = pd.Field(False)
+    file_output_dynamic: bool = pd.Field(False)
     slack_output: Optional[str] = pd.Field(None)
 
     other_args: dict[str, Any]
@@ -105,7 +106,7 @@ def validate_namespaces(cls, v: Union[list[str], Literal["*"]]) -> Union[list[st
             for val in v:
                 if val.startswith("*"):
                     raise ValueError("Namespace's values cannot start with an asterisk (*)")
-            
+
         return [val.lower() for val in v]
 
     @pd.validator("resources", pre=True)
diff --git a/robusta_krr/core/models/result.py b/robusta_krr/core/models/result.py
index 514590f9..827f8690 100644
--- a/robusta_krr/core/models/result.py
+++ b/robusta_krr/core/models/result.py
@@ -17,8 +17,8 @@ class Recommendation(pd.BaseModel):
 
 
 class ResourceRecommendation(pd.BaseModel):
-    requests: dict[ResourceType, RecommendationValue]
-    limits: dict[ResourceType, RecommendationValue]
+    requests: dict[ResourceType, Union[RecommendationValue, Recommendation]]
+    limits: dict[ResourceType, Union[RecommendationValue, Recommendation]]
     info: dict[ResourceType, Optional[str]]
 
 
@@ -40,6 +40,7 @@ def calculate(cls, object: K8sObjectData, recommendation: ResourceAllocations) -
 
                 current_severity = Severity.calculate(current, recommended, resource_type)
 
+                #TODO: consider... changing field after model created doesn't validate it.
                 getattr(recommendation_processed, selector)[resource_type] = Recommendation(
                     value=recommended, severity=current_severity
                 )
diff --git a/robusta_krr/formatters/csv.py b/robusta_krr/formatters/csv.py
index 812c35bd..d32f5f94 100644
--- a/robusta_krr/formatters/csv.py
+++ b/robusta_krr/formatters/csv.py
@@ -1,14 +1,31 @@
-import itertools
 import csv
-import logging
 import io
+import itertools
+import logging
+from typing import Any
 
 from robusta_krr.core.abstract import formatters
-from robusta_krr.core.models.allocations import RecommendationValue, format_recommendation_value, format_diff, NONE_LITERAL, NAN_LITERAL
+from robusta_krr.core.models.allocations import NONE_LITERAL, format_diff, format_recommendation_value
+from robusta_krr.core.models.config import settings
 from robusta_krr.core.models.result import ResourceScan, ResourceType, Result
 
 logger = logging.getLogger("krr")
 
+
+NAMESPACE_HEADER = "Namespace"
+NAME_HEADER = "Name"
+PODS_HEADER = "Pods"
+OLD_PODS_HEADER = "Old Pods"
+TYPE_HEADER = "Type"
+CONTAINER_HEADER = "Container"
+CLUSTER_HEADER = "Cluster"
+SEVERITY_HEADER = "Severity"
+
+RESOURCE_DIFF_HEADER = "{resource_name} Diff"
+RESOURCE_REQUESTS_HEADER = "{resource_name} Requests"
+RESOURCE_LIMITS_HEADER = "{resource_name} Limits"
+
+
 def _format_request_str(item: ResourceScan, resource: ResourceType, selector: str) -> str:
     allocated = getattr(item.object.allocations, selector)[resource]
     recommended = getattr(item.recommended, selector)[resource]
@@ -20,12 +37,8 @@ def _format_request_str(item: ResourceScan, resource: ResourceType, selector: st
     if diff != "":
         diff = f"({diff}) "
 
-    return (
-        diff
-        + format_recommendation_value(allocated)
-        + " -> "
-        + format_recommendation_value(recommended.value)
-    )
+    return diff + format_recommendation_value(allocated) + " -> " + format_recommendation_value(recommended.value)
+
 
 def _format_total_diff(item: ResourceScan, resource: ResourceType, pods_current: int) -> str:
     selector = "requests"
@@ -34,43 +47,57 @@ def _format_total_diff(item: ResourceScan, resource: ResourceType, pods_current:
 
     return format_diff(allocated, recommended, selector, pods_current)
 
+
 @formatters.register("csv")
 def csv_exporter(result: Result) -> str:
     # We need to order the resource columns so that they are in the format of Namespace,Name,Pods,Old Pods,Type,Container,CPU Diff,CPU Requests,CPU Limits,Memory Diff,Memory Requests,Memory Limits
-    resource_columns = []
+    csv_columns = ["Namespace", "Name", "Pods", "Old Pods", "Type", "Container"]
+
+    if settings.show_cluster_name:
+        csv_columns.insert(0, "Cluster")
+
+    if settings.show_severity:
+        csv_columns.append("Severity")
+
     for resource in ResourceType:
-        resource_columns.append(f"{resource.name} Diff")
-        resource_columns.append(f"{resource.name} Requests")
-        resource_columns.append(f"{resource.name} Limits")
+        csv_columns.append(RESOURCE_DIFF_HEADER.format(resource_name=resource.name))
+        csv_columns.append(RESOURCE_REQUESTS_HEADER.format(resource_name=resource.name))
+        csv_columns.append(RESOURCE_LIMITS_HEADER.format(resource_name=resource.name))
 
     output = io.StringIO()
-    csv_writer = csv.writer(output)
-    csv_writer.writerow([
-        "Namespace", "Name", "Pods", "Old Pods", "Type", "Container",
-        *resource_columns
-    ])
+    csv_writer = csv.DictWriter(output, csv_columns, extrasaction="ignore")
+    csv_writer.writeheader()
 
     for _, group in itertools.groupby(
         enumerate(result.scans), key=lambda x: (x[1].object.cluster, x[1].object.namespace, x[1].object.name)
     ):
         group_items = list(group)
 
-        for j, (i, item) in enumerate(group_items):
+        for j, (_, item) in enumerate(group_items):
             full_info_row = j == 0
 
-            row = [
-                item.object.namespace if full_info_row else "",
-                item.object.name if full_info_row else "",
-                f"{item.object.current_pods_count}" if full_info_row else "",
-                f"{item.object.deleted_pods_count}" if full_info_row else "",
-                item.object.kind if full_info_row else "",
-                item.object.container,
-            ]
+            row: dict[str, Any] = {
+                NAMESPACE_HEADER: item.object.namespace if full_info_row else "",
+                NAME_HEADER: item.object.name if full_info_row else "",
+                PODS_HEADER: f"{item.object.current_pods_count}" if full_info_row else "",
+                OLD_PODS_HEADER: f"{item.object.deleted_pods_count}" if full_info_row else "",
+                TYPE_HEADER: item.object.kind if full_info_row else "",
+                CONTAINER_HEADER: item.object.container,
+                SEVERITY_HEADER: item.severity,
+                CLUSTER_HEADER: item.object.cluster,
+            }
 
             for resource in ResourceType:
-                row.append(_format_total_diff(item, resource, item.object.current_pods_count))
-                row += [_format_request_str(item, resource, selector) for selector in ["requests", "limits"]]
+                row[RESOURCE_DIFF_HEADER.format(resource_name=resource.name)] = _format_total_diff(
+                    item, resource, item.object.current_pods_count
+                )
+                row[RESOURCE_REQUESTS_HEADER.format(resource_name=resource.name)] = _format_request_str(
+                    item, resource, "requests"
+                )
+                row[RESOURCE_LIMITS_HEADER.format(resource_name=resource.name)] = _format_request_str(
+                    item, resource, "limits"
+                )
 
             csv_writer.writerow(row)
-             
+
     return output.getvalue()
diff --git a/robusta_krr/main.py b/robusta_krr/main.py
index dd9ee03b..f82224b8 100644
--- a/robusta_krr/main.py
+++ b/robusta_krr/main.py
@@ -7,6 +7,7 @@
 from typing import List, Optional
 from uuid import UUID
 
+import click
 import typer
 import urllib3
 from pydantic import ValidationError  # noqa: F401
@@ -19,7 +20,12 @@
 from robusta_krr.core.runner import Runner
 from robusta_krr.utils.version import get_version
 
-app = typer.Typer(pretty_exceptions_show_locals=False, pretty_exceptions_short=True, no_args_is_help=True, help="IMPORTANT: Run `krr simple --help` to see all cli flags!")
+app = typer.Typer(
+    pretty_exceptions_show_locals=False,
+    pretty_exceptions_short=True,
+    no_args_is_help=True,
+    help="IMPORTANT: Run `krr simple --help` to see all cli flags!",
+)
 
 # NOTE: Disable insecure request warnings, as it might be expected to use self-signed certificates inside the cluster
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
@@ -216,7 +222,16 @@ def run_strategy(
                     rich_help_panel="Logging Settings",
                 ),
                 show_cluster_name: bool = typer.Option(
-                    False, "--show-cluster-name", help="In table output, always show the cluster name even for a single cluster", rich_help_panel="Output Settings"
+                    False,
+                    "--show-cluster-name",
+                    help="In table output, always show the cluster name even for a single cluster",
+                    rich_help_panel="Output Settings",
+                ),
+                show_severity: bool = typer.Option(
+                    True,
+                    " /--exclude-severity",
+                    help="Whether to include the severity in the output or not",
+                    rich_help_panel="Output Settings",
                 ),
                 verbose: bool = typer.Option(
                     False, "--verbose", "-v", help="Enable verbose mode", rich_help_panel="Logging Settings"
@@ -234,10 +249,16 @@ def run_strategy(
                     rich_help_panel="Logging Settings",
                 ),
                 file_output: Optional[str] = typer.Option(
-                    None, "--fileoutput", help="Filename to write output to (if not specified, file output is disabled)", rich_help_panel="Output Settings"
+                    None,
+                    "--fileoutput",
+                    help="Filename to write output to (if not specified, file output is disabled)",
+                    rich_help_panel="Output Settings",
                 ),
                 file_output_dynamic: bool = typer.Option(
-                    False, "--fileoutput-dynamic", help="Ignore --fileoutput and write files to the current directory in the format krr-{datetime}.{format} (e.g. krr-20240518223924.csv)", rich_help_panel="Output Settings"
+                    False,
+                    "--fileoutput-dynamic",
+                    help="Ignore --fileoutput and write files to the current directory in the format krr-{datetime}.{format} (e.g. krr-20240518223924.csv)",
+                    rich_help_panel="Output Settings",
                 ),
                 slack_output: Optional[str] = typer.Option(
                     None,
@@ -248,6 +269,8 @@ def run_strategy(
                 **strategy_args,
             ) -> None:
                 f"""Run KRR using the `{_strategy_name}` strategy"""
+                if not show_severity and format != "csv":
+                    raise click.BadOptionUsage("--exclude-severity", "--exclude-severity works only with format=csv")
 
                 try:
                     config = Config(
@@ -284,6 +307,7 @@ def run_strategy(
                         file_output=file_output,
                         file_output_dynamic=file_output_dynamic,
                         slack_output=slack_output,
+                        show_severity=show_severity,
                         strategy=_strategy_name,
                         other_args=strategy_args,
                     )
diff --git a/tests/formatters/test_csv_formatter.py b/tests/formatters/test_csv_formatter.py
new file mode 100644
index 00000000..150b2aa5
--- /dev/null
+++ b/tests/formatters/test_csv_formatter.py
@@ -0,0 +1,291 @@
+import csv
+import io
+import json
+from typing import Any
+
+import pytest
+
+from robusta_krr.core.models.config import Config
+from robusta_krr.core.models.result import Result
+from robusta_krr.formatters.csv import csv_exporter
+
+RESULT = """
+{
+    "scans": [
+        {
+            "object": {
+                "cluster": "mock-cluster",
+                "name": "mock-object-1",
+                "container": "mock-container-1",
+                "pods": [
+                    {
+                        "name": "mock-pod-1",
+                        "deleted": false
+                    },
+                    {
+                        "name": "mock-pod-2",
+                        "deleted": false
+                    },
+                    {
+                        "name": "mock-pod-3",
+                        "deleted": true
+                    }
+                ],
+                "hpa": null,
+                "namespace": "default",
+                "kind": "Deployment",
+                "allocations": {
+                    "requests": {
+                        "cpu": "50m",
+                        "memory": "2048Mi"
+                    },
+                    "limits": {
+                        "cpu": 2.0,
+                        "memory": 2.0
+                    },
+                    "info": {}
+                },
+                "warnings": []
+            },
+            "recommended": {
+                "requests": {
+                    "cpu": {
+                        "value": 0.0065,
+                        "severity": "UNKNOWN"
+                    },
+                    "memory": {
+                        "value": 0.5,
+                        "severity": "CRITICAL"
+                    }
+                },
+                "limits": {
+                    "cpu": {
+                        "value": "?",
+                        "severity": "UNKNOWN"
+                    },
+                    "memory": {
+                        "value": 0.5,
+                        "severity": "CRITICAL"
+                    }
+                },
+                "info": {
+                    "cpu": "Not enough data",
+                    "memory": "Not enough data"
+                }
+            },
+            "severity": "CRITICAL"
+        }
+    ],
+    "score": 100,
+    "resources": [
+        "cpu",
+        "memory"
+    ],
+    "description": "tests data",
+    "strategy": {
+        "name": "simple",
+        "settings": {
+            "history_duration": 336.0,
+            "timeframe_duration": 1.25,
+            "cpu_percentile": 95.0,
+            "memory_buffer_percentage": 15.0,
+            "points_required": 100,
+            "allow_hpa": false,
+            "use_oomkill_data": false,
+            "oom_memory_buffer_percentage": 25.0
+        }
+    },
+    "errors": [],
+    "clusterSummary": {},
+    "config": {
+        "quiet": false,
+        "verbose": false,
+        "clusters": [],
+        "kubeconfig": null,
+        "impersonate_user": null,
+        "impersonate_group": null,
+        "namespaces": "*",
+        "resources": [],
+        "selector": null,
+        "cpu_min_value": 10,
+        "memory_min_value": 100,
+        "prometheus_url": null,
+        "prometheus_auth_header": null,
+        "prometheus_other_headers": {},
+        "prometheus_ssl_enabled": false,
+        "prometheus_cluster_label": null,
+        "prometheus_label": null,
+        "eks_managed_prom": false,
+        "eks_managed_prom_profile_name": null,
+        "eks_access_key": null,
+        "eks_secret_key": null,
+        "eks_service_name": "aps",
+        "eks_managed_prom_region": null,
+        "coralogix_token": null,
+        "openshift": false,
+        "max_workers": 10,
+        "format": "csv",
+        "show_cluster_name": false,
+        "strategy": "simple",
+        "log_to_stderr": false,
+        "width": null,
+        "file_output": null,
+        "slack_output": null,
+        "other_args": {
+            "history_duration": "336",
+            "timeframe_duration": "1.25",
+            "cpu_percentile": "95",
+            "memory_buffer_percentage": "15",
+            "points_required": "100",
+            "allow_hpa": false,
+            "use_oomkill_data": false,
+            "oom_memory_buffer_percentage": "25"
+        },
+        "inside_cluster": false,
+        "file_output_dynamic": false
+    }
+}
+"""
+
+
+def _load_result(override_config: dict[str, Any]) -> Result:
+    res_data = json.loads(RESULT)
+    res_data["config"].update(override_config)
+    result = Result(**res_data)
+    Config.set_config(result.config)
+    return result
+
+
+@pytest.mark.parametrize(
+    "override_config, expected_headers",
+    [
+        (
+            {},
+            [
+                "Namespace",
+                "Name",
+                "Pods",
+                "Old Pods",
+                "Type",
+                "Container",
+                "Severity",
+                "CPU Diff",
+                "CPU Requests",
+                "CPU Limits",
+                "Memory Diff",
+                "Memory Requests",
+                "Memory Limits",
+            ],
+        ),
+        (
+            {"show_severity": False},
+            [
+                "Namespace",
+                "Name",
+                "Pods",
+                "Old Pods",
+                "Type",
+                "Container",
+                "CPU Diff",
+                "CPU Requests",
+                "CPU Limits",
+                "Memory Diff",
+                "Memory Requests",
+                "Memory Limits",
+            ],
+        ),
+        (
+            {"show_cluster_name": True},
+            [
+                "Cluster",
+                "Namespace",
+                "Name",
+                "Pods",
+                "Old Pods",
+                "Type",
+                "Container",
+                "Severity",
+                "CPU Diff",
+                "CPU Requests",
+                "CPU Limits",
+                "Memory Diff",
+                "Memory Requests",
+                "Memory Limits",
+            ],
+        ),
+    ],
+)
+def test_csv_headers(override_config: dict[str, Any], expected_headers: list[str]) -> None:
+    result = _load_result(override_config=override_config)
+    output = csv_exporter(result)
+    reader = csv.DictReader(io.StringIO(output))
+
+    assert reader.fieldnames == expected_headers
+
+
+@pytest.mark.parametrize(
+    "override_config, expected_first_row",
+    [
+        (
+            {},
+            {
+                "Namespace": "default",
+                "Name": "mock-object-1",
+                "Pods": "2",
+                "Old Pods": "1",
+                "Type": "Deployment",
+                "Container": "mock-container-1",
+                'Severity': 'CRITICAL',
+                "CPU Diff": "-87m",
+                "CPU Requests": "(-43m) 50m -> 6m",
+                "CPU Limits": "2.0 -> ?",
+                "Memory Diff": "-4096Mi",
+                "Memory Requests": "(-2048Mi) 2048Mi -> 500m",
+                "Memory Limits": "2.0 -> 500m",
+            },
+        ),
+        (
+            {"show_severity": False},
+            {
+                "Namespace": "default",
+                "Name": "mock-object-1",
+                "Pods": "2",
+                "Old Pods": "1",
+                "Type": "Deployment",
+                "Container": "mock-container-1",
+                "CPU Diff": "-87m",
+                "CPU Requests": "(-43m) 50m -> 6m",
+                "CPU Limits": "2.0 -> ?",
+                "Memory Diff": "-4096Mi",
+                "Memory Requests": "(-2048Mi) 2048Mi -> 500m",
+                "Memory Limits": "2.0 -> 500m",
+            },
+        ),
+        (
+            {"show_cluster_name": True},
+            {
+                "Cluster": "mock-cluster",
+                "Namespace": "default",
+                "Name": "mock-object-1",
+                "Pods": "2",
+                "Old Pods": "1",
+                "Type": "Deployment",
+                "Container": "mock-container-1",
+                'Severity': 'CRITICAL',
+                "CPU Diff": "-87m",
+                "CPU Requests": "(-43m) 50m -> 6m",
+                "CPU Limits": "2.0 -> ?",
+                "Memory Diff": "-4096Mi",
+                "Memory Requests": "(-2048Mi) 2048Mi -> 500m",
+                "Memory Limits": "2.0 -> 500m",
+            },
+        ),
+    ],
+)
+def test_csv_row_value(override_config: dict[str, Any], expected_first_row: list[str]) -> None:
+    result = _load_result(override_config=override_config)
+    output = csv_exporter(result)
+    reader = csv.DictReader(io.StringIO(output))
+
+    first_row: dict[str, Any] = next(reader)
+    assert first_row == expected_first_row
diff --git a/tests/test_krr.py b/tests/test_krr.py
index d4c6d6a9..90ea2af5 100644
--- a/tests/test_krr.py
+++ b/tests/test_krr.py
@@ -30,7 +30,7 @@ def test_run(log_flag: str):
         raise e from result.exception
 
 
-@pytest.mark.parametrize("format", ["json", "yaml", "table", "pprint"])
+@pytest.mark.parametrize("format", ["json", "yaml", "table", "pprint", "csv"])
 @pytest.mark.parametrize("output", ["--logtostderr", "-q"])
 def test_output_formats(format: str, output: str):
     result = runner.invoke(app, [STRATEGY_NAME, output, "-f", format])
diff --git a/tests/test_runner.py b/tests/test_runner.py
new file mode 100644
index 00000000..dcf4e19b
--- /dev/null
+++ b/tests/test_runner.py
@@ -0,0 +1,21 @@
+import pytest
+from click.testing import Result
+from typer.testing import CliRunner
+
+from robusta_krr.main import app, load_commands
+
+runner = CliRunner(mix_stderr=False)
+load_commands()
+
+
+@pytest.mark.parametrize(
+    "args, expected_exit_code",
+    [
+        (["--exclude-severity", "-f", "csv"], 0),
+        (["--exclude-severity", "-f", "table"], 2),
+        (["--exclude-severity"], 2),
+    ],
+)
+def test_exclude_severity_option(args: list[str], expected_exit_code: int) -> None:
+    result: Result = runner.invoke(app, ["simple", *args])
+    assert result.exit_code == expected_exit_code