From db3e9acfa79c25fe8dbf9715274866ec41bbd75a Mon Sep 17 00:00:00 2001 From: Avi-Robusta <97387909+Avi-Robusta@users.noreply.github.com> Date: Mon, 17 Jun 2024 11:00:15 +0300 Subject: [PATCH] added filter on alert by label (#45) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Example run ``` kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/pending_pods/pending_pod_node_selector.yaml # wait till alert fires... poetry run python3 /Users/avirobusta/git/holmesgpt/holmes.py investigate alertmanager --alertmanager-url=http://localhost:9093/ --alert-label-key=pod --alert-label-value=user-profile-import Configuration file exists at /Users/avirobusta/Library/Preferences/pypoetry, reusing this directory. Consider moving TOML configuration files to /Users/avirobusta/Library/Application Support/pypoetry, as support for the legacy directory will be removed in an upcoming release. Analyzing all 1 issues. (Use --alertmanager-alertname to filter.) Press Ctrl+C to stop. Analyzing issue 1/1: KubePodNotReady... No runbooks found for this issue. Using default behaviour. (Add runbooks to guide the investigation.) Running `kubectl describe Pod user-profile-import -n default` tools.py:89 ──────────────────────────────────────────────────────── AI: Node Affinity Mismatch Resource: Pod user-profile-import Details: Pod user-profile-import is pending because 0/3 nodes match its node affinity/selector criteria. ──────────────────────────────────────────────────────── ``` --- holmes.py | 4 ++++ holmes/config.py | 2 ++ holmes/plugins/sources/prometheus/plugin.py | 19 ++++++++++++++++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/holmes.py b/holmes.py index eb11bfb7..be65fe15 100644 --- a/holmes.py +++ b/holmes.py @@ -198,6 +198,9 @@ def alertmanager( system_prompt: Optional[str] = typer.Option( "builtin://generic_investigation.jinja2", help=system_prompt_help ), + alertmanager_label: Optional[str] = typer.Option( + None, help="For filtering alerts with a specific label must be of format key=value" + ), ): """ Investigate a Prometheus/Alertmanager alert @@ -214,6 +217,7 @@ def alertmanager( alertmanager_username=alertmanager_username, alertmanager_password=alertmanager_password, alertmanager_alertname=alertmanager_alertname, + alertmanager_label=alertmanager_label, slack_token=slack_token, slack_channel=slack_channel, custom_toolsets=custom_toolsets, diff --git a/holmes/config.py b/holmes/config.py index 7a566d67..0092c6fe 100644 --- a/holmes/config.py +++ b/holmes/config.py @@ -47,6 +47,7 @@ class Config(RobustaBaseConfig): alertmanager_username: Optional[str] = None alertmanager_password: Optional[str] = None alertmanager_alertname: Optional[str] = None + alertmanager_label: Optional[str] = None jira_url: Optional[str] = None jira_username: Optional[str] = None @@ -255,6 +256,7 @@ def create_alertmanager_source(self) -> AlertManagerSource: username=self.alertmanager_username, password=self.alertmanager_password, alertname=self.alertmanager_alertname, + label=self.alertmanager_label, ) def create_slack_destination(self): diff --git a/holmes/plugins/sources/prometheus/plugin.py b/holmes/plugins/sources/prometheus/plugin.py index 43bb37b5..8bda3a55 100644 --- a/holmes/plugins/sources/prometheus/plugin.py +++ b/holmes/plugins/sources/prometheus/plugin.py @@ -25,12 +25,14 @@ def __init__( username: Optional[str] = None, password: Optional[str] = None, alertname: Optional[Pattern] = None, + label: Optional[str] = None ): super().__init__() self.url = url self.username = username self.password = password self.alertname = alertname + self.label = label def fetch_issues(self) -> List[Issue]: fetch_alerts_url = f"{self.url}/api/v2/alerts" @@ -55,9 +57,13 @@ def fetch_issues(self) -> List[Issue]: a.to_regular_prometheus_alert() for a in parse_obj_as(List[PrometheusGettableAlert], data) ] + + alerts = self.label_filter_issues(alerts) + if self.alertname is not None: alertname_filter = re.compile(self.alertname) - alerts = [a for a in alerts if alertname_filter.match(a.unique_id)] + alerts = [a for a in alerts if alertname_filter.match(a.unique_id)] + return [ Issue( @@ -73,6 +79,17 @@ def fetch_issues(self) -> List[Issue]: for alert in alerts ] + def label_filter_issues(self, issues: List[PrometheusAlert]) -> List[PrometheusAlert]: + if not self.label: + return issues + + label_parts = self.label.split('=') + if len(label_parts) != 2: + raise Exception(f"The label {self.label} is of the wrong format use '--alertmanager-label key=value'") + + alert_label_key, alert_label_value = label_parts + return [issue for issue in issues if issue.labels.get(alert_label_key, None) == alert_label_value] + @staticmethod def __format_issue_metadata(alert: PrometheusAlert) -> Optional[str]: if not alert.labels and not alert.annotations: