Skip to content

Commit

Permalink
Adding cos agent relation (#127)
Browse files Browse the repository at this point in the history
* init cos-agent relation

* fixing events name

* fix paths

* refactor logic and remove prom_scrape changes

* pump staticlib python version

* remove redundant method

* fix linter

* adding AlertGroupModel data class

* Adding dataclasses
  • Loading branch information
IbraAoad authored Mar 5, 2024
1 parent e62a658 commit 662559b
Show file tree
Hide file tree
Showing 9 changed files with 1,012 additions and 15 deletions.
819 changes: 819 additions & 0 deletions lib/charms/grafana_agent/v0/cos_agent.py

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ provides:
interface: prometheus_scrape
filebeat:
interface: elastic-beats
cos-agent:
interface: cos_agent

requires:
dashboards:
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
git+https://github.com/canonical/operator#egg=ops
git+https://github.com/canonical/charm-relation-interfaces.git@main
jinja2 < 3
markupsafe == 2.0.1
pydantic < 2
152 changes: 144 additions & 8 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
from pathlib import Path
from typing import Any, Dict, List, Optional, cast

import yaml
from charms.grafana_agent.v0.cos_agent import COSAgentProvider
from charms.grafana_k8s.v0.grafana_dashboard import GrafanaDashboardAggregator
from charms.nrpe_exporter.v0.nrpe_exporter import (
NrpeExporterProvider,
Expand All @@ -55,15 +57,24 @@
service_running,
service_stop,
)
from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointAggregator
from charms.prometheus_k8s.v0.prometheus_scrape import (
MetricsEndpointAggregator,
_type_convert_stored,
)
from charms.vector.v0.vector import VectorProvider
from interfaces.prometheus_scrape.v0.schema import AlertGroupModel, AlertRulesModel, ScrapeJobModel
from ops import RelationBrokenEvent, RelationChangedEvent
from ops.charm import CharmBase
from ops.framework import StoredState
from ops.main import main
from ops.model import ActiveStatus, BlockedStatus

logger = logging.getLogger(__name__)

DASHBOARDS_DIR = "./src/cos_agent/grafana_dashboards"
COS_PROXY_DASHBOARDS_DIR = "./src/grafana_dashboards"
RULES_DIR = "./src/cos_agent/prometheus_alert_rules"


class COSProxyCharm(CharmBase):
"""This class instantiates Charmed Operator libraries and sets the status of the charm.
Expand All @@ -86,6 +97,7 @@ def __init__(self, *args):
have_nrpe=False,
have_loki=False,
have_filebeat=False,
have_gagent=False,
)

self._dashboard_aggregator = GrafanaDashboardAggregator(self)
Expand All @@ -95,6 +107,11 @@ def __init__(self, *args):
self._dashboards_relation_joined,
)

self.framework.observe(
self.on.dashboards_relation_changed, # pyright: ignore
self._dashboards_relation_changed,
)

self.framework.observe(
self.on.dashboards_relation_broken, # pyright: ignore
self._dashboards_relation_broken,
Expand All @@ -111,6 +128,28 @@ def __init__(self, *args):
)

self.metrics_aggregator = MetricsEndpointAggregator(self, resolve_addresses=True)
self.cos_agent = COSAgentProvider(
self,
scrape_configs=self._get_scrape_configs,
metrics_rules_dir=RULES_DIR,
dashboard_dirs=[COS_PROXY_DASHBOARDS_DIR, DASHBOARDS_DIR],
refresh_events=[
self.on.prometheus_target_relation_changed,
self.on.prometheus_target_relation_broken,
self.on.dashboards_relation_changed,
self.on.dashboards_relation_broken,
],
)

self.framework.observe(
self.on.cos_agent_relation_joined, # pyright: ignore
self._on_cos_agent_relation_joined,
)

self.framework.observe(
self.on.cos_agent_relation_broken, # pyright: ignore
self._on_cos_agent_relation_broken,
)

self.nrpe_exporter = NrpeExporterProvider(self)
self.framework.observe(
Expand Down Expand Up @@ -140,6 +179,10 @@ def __init__(self, *args):
self.on.prometheus_target_relation_joined, # pyright: ignore
self._prometheus_target_relation_joined,
)
self.framework.observe(
self.on.prometheus_target_relation_changed, # pyright: ignore
self._prometheus_target_relation_changed,
)
self.framework.observe(
self.on.prometheus_target_relation_broken, # pyright: ignore
self._prometheus_target_relation_broken,
Expand Down Expand Up @@ -173,12 +216,97 @@ def __init__(self, *args):

self._set_status()

def _on_cos_agent_relation_joined(self, _):
self._stored.have_gagent = True
self._set_status()

def _on_cos_agent_relation_broken(self, _):
self._stored.have_gagent = False
self._set_status()

def _delete_existing_dashboard_files(self, dashboards_dir: str):
directory = Path(dashboards_dir)
if directory.exists():
for file_path in directory.glob("request_*.json"):
file_path.unlink()

def _create_dashboard_files(self, dashboards_dir: str):
dashboards_rel = self._dashboard_aggregator._target_relation

directory = Path(dashboards_dir)
directory.mkdir(parents=True, exist_ok=True)

for relation in self.model.relations[dashboards_rel]:
for k in relation.data[self.unit].keys():
if k.startswith("request_"):
dashboard = json.loads(relation.data[self.unit][k])["dashboard"] # type: ignore
dashboard_file_path = (
directory / f"request_{k}.json"
) # Using the key as filename
with open(dashboard_file_path, "w") as dashboard_file:
json.dump(dashboard, dashboard_file, indent=4)

def _get_scrape_configs(self):
"""Return the scrape jobs."""
jobs = []
stored_jobs = _type_convert_stored(self.metrics_aggregator._stored.jobs) # pyright: ignore
if stored_jobs:
for job_data in stored_jobs:
stored_jobs_model = ScrapeJobModel(**job_data)
jobs.append(stored_jobs_model.dict())

for relation in self.model.relations[self.metrics_aggregator._target_relation]:
targets = self.metrics_aggregator._get_targets(relation)
if targets and relation.app:
target_job_data = self.metrics_aggregator._static_scrape_job(
targets, relation.app.name
)
target_job = ScrapeJobModel(**target_job_data)
jobs.append(target_job.dict())
return jobs

def _get_alert_groups(self) -> AlertRulesModel:
"""Return the alert rules groups."""
alert_rules_model = AlertRulesModel(groups=[])
stored_rules = _type_convert_stored(
self.metrics_aggregator._stored.alert_rules
) # pyright: ignore
if stored_rules:
for rule_data in stored_rules:
stored_rules_model = AlertGroupModel(**rule_data)
alert_rules_model.groups.append(stored_rules_model)

for relation in self.model.relations[self.metrics_aggregator._alert_rules_relation]:
unit_rules = self.metrics_aggregator._get_alert_rules(relation)
if unit_rules and relation.app:
appname = relation.app.name
rules = self.metrics_aggregator._label_alert_rules(unit_rules, appname)
group_name = self.metrics_aggregator.group_name(appname)
group = AlertGroupModel(name=group_name, rules=rules)
alert_rules_model.groups.append(group)

return alert_rules_model

def _handle_prometheus_alert_rule_files(self, rules_dir: str, app_name: str):
groups = self._get_alert_groups()

directory = Path(rules_dir)
directory.mkdir(parents=True, exist_ok=True)
alert_rules_file_path = directory / f"{app_name}-rules.yaml"

with open(alert_rules_file_path, "w") as alert_rules_file:
yaml.dump(groups.dict(), alert_rules_file, default_flow_style=False)

def _dashboards_relation_joined(self, _):
self._stored.have_dashboards = True
self._set_status()

def _dashboards_relation_changed(self, _):
self._create_dashboard_files(DASHBOARDS_DIR)

def _dashboards_relation_broken(self, _):
self._stored.have_dashboards = False
self._delete_existing_dashboard_files(DASHBOARDS_DIR)
self._set_status()

def _on_install(self, _):
Expand Down Expand Up @@ -424,8 +552,12 @@ def _prometheus_target_relation_joined(self, _):
self._stored.have_targets = True
self._set_status()

def _prometheus_target_relation_broken(self, _):
def _prometheus_target_relation_changed(self, event: RelationChangedEvent):
self._handle_prometheus_alert_rule_files(RULES_DIR, event.app.name)

def _prometheus_target_relation_broken(self, event: RelationBrokenEvent):
self._stored.have_targets = False
self._handle_prometheus_alert_rule_files(RULES_DIR, event.app.name)
self._set_status()

def _downstream_prometheus_scrape_relation_joined(self, _):
Expand Down Expand Up @@ -475,10 +607,14 @@ def _on_nrpe_targets_changed(self, event: Optional[NrpeTargetsChangedEvent]):

def _set_status(self):
messages = []
if (self._stored.have_grafana and not self._stored.have_dashboards) or ( # pyright: ignore
self._stored.have_dashboards and not self._stored.have_grafana # pyright: ignore
if (
(self._stored.have_grafana or self._stored.have_gagent)
and not self._stored.have_dashboards
) or ( # pyright: ignore
self._stored.have_dashboards
and not (self._stored.have_grafana or self._stored.have_gagent) # pyright: ignore
):
messages.append("one of (Grafana|dashboard)")
messages.append("one of (Grafana|dashboard|grafana-agent)")

if (
self._stored.have_loki # pyright: ignore
Expand All @@ -489,13 +625,13 @@ def _set_status(self):
messages.append("one of (Loki|filebeat)")

if (
self._stored.have_prometheus # pyright: ignore
(self._stored.have_prometheus or self._stored.have_gagent) # pyright: ignore
and not (self._stored.have_targets or self._stored.have_nrpe) # pyright: ignore
) or (
(self._stored.have_targets or self._stored.have_nrpe) # pyright: ignore
and not self._stored.have_prometheus # pyright: ignore
and not (self._stored.have_prometheus or self._stored.have_gagent) # pyright: ignore
):
messages.append("one of (Prometheus|target|nrpe)")
messages.append("one of (Prometheus|target|nrpe|grafana-agent)")

if messages:
self.unit.status = BlockedStatus(f"Missing {', '.join(messages)} relation(s)")
Expand Down
15 changes: 15 additions & 0 deletions tests/interface/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# # Copyright 2022 Canonical Ltd.
# # See LICENSE file for licensing details.
# from unittest.mock import patch

import pytest
from charm import COSProxyCharm
from interface_tester import InterfaceTester


@pytest.fixture
def interface_tester(interface_tester: InterfaceTester):
interface_tester.configure(
charm_type=COSProxyCharm,
)
yield interface_tester
11 changes: 11 additions & 0 deletions tests/interface/test_cos_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.

from interface_tester import InterfaceTester


def test_cos_agent_v0_interface(interface_tester: InterfaceTester):
interface_tester.configure(
interface_name="cos_agent",
)
interface_tester.run()
2 changes: 1 addition & 1 deletion tests/scenario/test_alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_relation(ctx, n_remote_units):
for i in range(n_remote_units)
},
)
state_in = State(leader=True, relations=[monitors], networks=[Network.default("monitors")])
state_in = State(leader=True, relations=[monitors], networks={"monitors": Network.default()})

with patch("charm.COSProxyCharm._modify_enrichment_file", new=MagicMock()) as f:
state_out = ctx.run(monitors.changed_event, state_in)
Expand Down
8 changes: 4 additions & 4 deletions tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def test_scrape_target_relation_without_downstream_prometheus_blocks(self):

self.assertEqual(
self.harness.model.unit.status,
BlockedStatus("Missing one of (Prometheus|target|nrpe) relation(s)"),
BlockedStatus("Missing one of (Prometheus|target|nrpe|grafana-agent) relation(s)"),
)

def test_prometheus_relation_without_scrape_target_blocks(self):
Expand All @@ -174,7 +174,7 @@ def test_prometheus_relation_without_scrape_target_blocks(self):

self.assertEqual(
self.harness.model.unit.status,
BlockedStatus("Missing one of (Prometheus|target|nrpe) relation(s)"),
BlockedStatus("Missing one of (Prometheus|target|nrpe|grafana-agent) relation(s)"),
)

def test_grafana_relation_without_dashboards_blocks(self):
Expand All @@ -187,7 +187,7 @@ def test_grafana_relation_without_dashboards_blocks(self):

self.assertEqual(
self.harness.model.unit.status,
BlockedStatus("Missing one of (Grafana|dashboard) relation(s)"),
BlockedStatus("Missing one of (Grafana|dashboard|grafana-agent) relation(s)"),
)

def test_dashboards_without_grafana_relations_blocks(self):
Expand All @@ -198,7 +198,7 @@ def test_dashboards_without_grafana_relations_blocks(self):

self.assertEqual(
self.harness.model.unit.status,
BlockedStatus("Missing one of (Grafana|dashboard) relation(s)"),
BlockedStatus("Missing one of (Grafana|dashboard|grafana-agent) relation(s)"),
)

def test_scrape_jobs_are_forwarded_on_adding_prometheus_then_targets(self):
Expand Down
16 changes: 14 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ deps =
charm: httpcore==0.14.7
commands =
charm: pyright {[vars]src_path} {posargs}
lib: pyright --pythonversion 3.5 {[vars]lib_path} {posargs}
lib: pyright --pythonversion 3.10 {[vars]lib_path} {posargs}

[testenv:unit]
description = Run unit tests
Expand Down Expand Up @@ -87,11 +87,23 @@ description = Run integration tests
description = Run scenario tests
deps =
pytest
ops-scenario == 5.4
ops-scenario>=6
coverage[toml]
cosl
-r{toxinidir}/requirements.txt
commands =
coverage run \
--source={[vars]src_path},{[vars]lib_path} \
-m pytest -v --tb native --log-cli-level=INFO -s {posargs} {[vars]tst_path}/scenario
coverage report

[testenv:interface]
description = Run interface tests
deps =
pytest
ops-scenario>=6
pytest-interface-tester
cosl
-r{toxinidir}/requirements.txt
commands =
pytest -v --tb native --log-cli-level=INFO -s {posargs} {[vars]tst_path}/interface

0 comments on commit 662559b

Please sign in to comment.