From 498d5f6f1f5a9910bd20574161e5719616e5a07a Mon Sep 17 00:00:00 2001 From: arik Date: Mon, 27 May 2024 15:02:39 +0300 Subject: [PATCH] increase krr default memory request and limit to 2Gi, to prevent oom kills when scanning big clusters (#1441) --- docs/playbook-reference/actions/scans.rst | 6 +++--- playbooks/robusta_playbooks/krr.py | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/playbook-reference/actions/scans.rst b/docs/playbook-reference/actions/scans.rst index 4ea65fd8b..edee64b99 100644 --- a/docs/playbook-reference/actions/scans.rst +++ b/docs/playbook-reference/actions/scans.rst @@ -126,11 +126,11 @@ To prevent the KRR job from OOMKill (Out of Memory), you can configure the memor runner: additional_env_vars: - name: KRR_MEMORY_REQUEST - value: "2Gi" + value: "3Gi" - name: KRR_MEMORY_LIMIT - value: "2Gi" + value: "3Gi" -By default, the memory request and limit are set to ``1Gi``. Modify these values according to your requirements. +By default, the memory request and limit are set to ``2Gi``. Modify these values according to your requirements. Popeye - A Kubernetes Cluster Sanitizer ************************************************ diff --git a/playbooks/robusta_playbooks/krr.py b/playbooks/robusta_playbooks/krr.py index 3476bea7f..9a889705c 100644 --- a/playbooks/robusta_playbooks/krr.py +++ b/playbooks/robusta_playbooks/krr.py @@ -10,6 +10,7 @@ from hikaru.model.rel_1_26 import Container, EnvVar, EnvVarSource, PodSpec, ResourceRequirements, SecretKeySelector from prometrix import AWSPrometheusConfig, CoralogixPrometheusConfig, PrometheusAuthorization, PrometheusConfig from pydantic import BaseModel, ValidationError, validator + from robusta.api import ( IMAGE_REGISTRY, RUNNER_SERVICE_ACCOUNT, @@ -33,8 +34,8 @@ from robusta.integrations.prometheus.utils import generate_prometheus_config IMAGE: str = os.getenv("KRR_IMAGE_OVERRIDE", f"{IMAGE_REGISTRY}/krr:v1.9.0") -KRR_MEMORY_LIMIT: str = os.getenv("KRR_MEMORY_LIMIT", "1Gi") -KRR_MEMORY_REQUEST: str = os.getenv("KRR_MEMORY_REQUEST", "1Gi") +KRR_MEMORY_LIMIT: str = os.getenv("KRR_MEMORY_LIMIT", "2Gi") +KRR_MEMORY_REQUEST: str = os.getenv("KRR_MEMORY_REQUEST", "2Gi") SeverityType = Literal["CRITICAL", "WARNING", "OK", "GOOD", "UNKNOWN"]