-
Notifications
You must be signed in to change notification settings - Fork 16
/
loki_push_api.py
2823 lines (2301 loc) · 110 KB
/
loki_push_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
# Copyright 2023 Canonical Ltd.
# See LICENSE file for licensing details.
#
# Learn more at: https://juju.is/docs/sdk
r"""## Overview.
This document explains how to use the two principal objects this library provides:
- `LokiPushApiProvider`: This object is meant to be used by any Charmed Operator that needs to
implement the provider side of the `loki_push_api` relation interface. For instance, a Loki charm.
The provider side of the relation represents the server side, to which logs are being pushed.
- `LokiPushApiConsumer`: This object is meant to be used by any Charmed Operator that needs to
send log to Loki by implementing the consumer side of the `loki_push_api` relation interface.
For instance, a Promtail or Grafana agent charm which needs to send logs to Loki.
- `LogProxyConsumer`: DEPRECATED.
This object can be used by any Charmed Operator which needs to send telemetry, such as logs, to
Loki through a Log Proxy by implementing the consumer side of the `loki_push_api` relation
interface.
In order to be able to control the labels on the logs pushed this object adds a Pebble layer
that runs Promtail in the workload container, injecting Juju topology labels into the
logs on the fly.
This object is deprecated. Consider migrating to LogForwarder with the release of Juju 3.6 LTS.
- `LogForwarder`: This object can be used by any Charmed Operator which needs to send the workload
standard output (stdout) through Pebble's log forwarding mechanism, to Loki endpoints through the
`loki_push_api` relation interface.
In order to be able to control the labels on the logs pushed this object updates the pebble layer's
"log-targets" section with Juju topology.
Filtering logs in Loki is largely performed on the basis of labels. In the Juju ecosystem, Juju
topology labels are used to uniquely identify the workload which generates telemetry like logs.
## LokiPushApiProvider Library Usage
This object may be used by any Charmed Operator which implements the `loki_push_api` interface.
For instance, Loki or Grafana Agent.
For this purpose a charm needs to instantiate the `LokiPushApiProvider` object with one mandatory
and three optional arguments.
- `charm`: A reference to the parent (Loki) charm.
- `relation_name`: The name of the relation that the charm uses to interact
with its clients, which implement `LokiPushApiConsumer` `LogForwarder`, or `LogProxyConsumer`
(note that LogProxyConsumer is deprecated).
If provided, this relation name must match a provided relation in metadata.yaml with the
`loki_push_api` interface.
The default relation name is "logging" for `LokiPushApiConsumer` and `LogForwarder`, and
"log-proxy" for `LogProxyConsumer` (note that LogProxyConsumer is deprecated).
For example, a provider's `metadata.yaml` file may look as follows:
```yaml
provides:
logging:
interface: loki_push_api
```
Subsequently, a Loki charm may instantiate the `LokiPushApiProvider` in its constructor as
follows:
from charms.loki_k8s.v1.loki_push_api import LokiPushApiProvider
from loki_server import LokiServer
...
class LokiOperatorCharm(CharmBase):
...
def __init__(self, *args):
super().__init__(*args)
...
external_url = urlparse(self._external_url)
self.loki_provider = LokiPushApiProvider(
self,
address=external_url.hostname or self.hostname,
port=external_url.port or 80,
scheme=external_url.scheme,
path=f"{external_url.path}/loki/api/v1/push",
)
...
- `port`: Loki Push Api endpoint port. Default value: `3100`.
- `scheme`: Loki Push Api endpoint scheme (`HTTP` or `HTTPS`). Default value: `HTTP`
- `address`: Loki Push Api endpoint address. Default value: `localhost`
- `path`: Loki Push Api endpoint path. Default value: `loki/api/v1/push`
The `LokiPushApiProvider` object has several responsibilities:
1. Set the URL of the Loki Push API in the relation application data bag; the URL
must be unique to all instances (e.g. using a load balancer).
2. Set the Promtail binary URL (`promtail_binary_zip_url`) so clients that use
`LogProxyConsumer` object could download and configure it.
3. Process the metadata of the consumer application, provided via the
"metadata" field of the consumer data bag, which are used to annotate the
alert rules (see next point). An example for "metadata" is the following:
{'model': 'loki',
'model_uuid': '0b7d1071-ded2-4bf5-80a3-10a81aeb1386',
'application': 'promtail-k8s'
}
4. Process alert rules set into the relation by the `LokiPushApiConsumer`
objects, e.g.:
'{
"groups": [{
"name": "loki_0b7d1071-ded2-4bf5-80a3-10a81aeb1386_promtail-k8s_alerts",
"rules": [{
"alert": "HighPercentageError",
"expr": "sum(rate({app=\\"foo\\", env=\\"production\\"} |= \\"error\\" [5m]))
by (job) \\n /\\nsum(rate({app=\\"foo\\", env=\\"production\\"}[5m]))
by (job)\\n > 0.05
\\n", "for": "10m",
"labels": {
"severity": "page",
"juju_model": "loki",
"juju_model_uuid": "0b7d1071-ded2-4bf5-80a3-10a81aeb1386",
"juju_application": "promtail-k8s"
},
"annotations": {
"summary": "High request latency"
}
}]
}]
}'
Once these alert rules are sent over relation data, the `LokiPushApiProvider` object
stores these files in the directory `/loki/rules` inside the Loki charm container. After
storing alert rules files, the object will check alert rules by querying Loki API
endpoint: [`loki/api/v1/rules`](https://grafana.com/docs/loki/latest/api/#list-rule-groups).
If there are changes in the alert rules a `loki_push_api_alert_rules_changed` event will
be emitted with details about the `RelationEvent` which triggered it.
This events should be observed in the charm that uses `LokiPushApiProvider`:
```python
def __init__(self, *args):
super().__init__(*args)
...
self.loki_provider = LokiPushApiProvider(self)
self.framework.observe(
self.loki_provider.on.loki_push_api_alert_rules_changed,
self._loki_push_api_alert_rules_changed,
)
```
## LokiPushApiConsumer Library Usage
This Loki charm interacts with its clients using the Loki charm library. Charms
seeking to send log to Loki, must do so using the `LokiPushApiConsumer` object from
this charm library.
> **NOTE**: `LokiPushApiConsumer` also depends on an additional charm library.
>
> Ensure sure you `charmcraft fetch-lib charms.observability_libs.v0.juju_topology`
> when using this library.
For the simplest use cases, using the `LokiPushApiConsumer` object only requires
instantiating it, typically in the constructor of your charm (the one which
sends logs).
```python
from charms.loki_k8s.v1.loki_push_api import LokiPushApiConsumer
class LokiClientCharm(CharmBase):
def __init__(self, *args):
super().__init__(*args)
...
self._loki_consumer = LokiPushApiConsumer(self)
```
The `LokiPushApiConsumer` constructor requires two things:
- A reference to the parent (LokiClientCharm) charm.
- Optionally, the name of the relation that the Loki charm uses to interact
with its clients. If provided, this relation name must match a required
relation in metadata.yaml with the `loki_push_api` interface.
This argument is not required if your metadata.yaml has precisely one
required relation in metadata.yaml with the `loki_push_api` interface, as the
lib will automatically resolve the relation name inspecting the using the
meta information of the charm
Any time the relation between a Loki provider charm and a Loki consumer charm is
established, a `LokiPushApiEndpointJoined` event is fired. In the consumer side
is it possible to observe this event with:
```python
self.framework.observe(
self._loki_consumer.on.loki_push_api_endpoint_joined,
self._on_loki_push_api_endpoint_joined,
)
```
Any time there are departures in relations between the consumer charm and Loki
the consumer charm is informed, through a `LokiPushApiEndpointDeparted` event, for instance:
```python
self.framework.observe(
self._loki_consumer.on.loki_push_api_endpoint_departed,
self._on_loki_push_api_endpoint_departed,
)
```
The consumer charm can then choose to update its configuration in both situations.
Note that LokiPushApiConsumer does not add any labels automatically on its own. In
order to better integrate with the Canonical Observability Stack, you may want to configure your
software to add Juju topology labels. The
[observability-libs](https://charmhub.io/observability-libs) library can be used to get topology
labels in charm code. See :func:`LogProxyConsumer._scrape_configs` for an example of how
to do this with promtail.
## LogProxyConsumer Library Usage
> Note: This object is deprecated. Consider migrating to LogForwarder with the release of Juju 3.6
> LTS.
Let's say that we have a workload charm that produces logs, and we need to send those logs to a
workload implementing the `loki_push_api` interface, such as `Loki` or `Grafana Agent`.
Adopting this object in a Charmed Operator consist of two steps:
1. Use the `LogProxyConsumer` class by instantiating it in the `__init__` method of the charmed
operator. There are two ways to get logs in to promtail. You can give it a list of files to
read, or you can write to it using the syslog protocol.
For example:
```python
from charms.loki_k8s.v1.loki_push_api import LogProxyConsumer
...
def __init__(self, *args):
...
self._log_proxy = LogProxyConsumer(
self,
logs_scheme={
"workload-a": {
"log-files": ["/tmp/worload-a-1.log", "/tmp/worload-a-2.log"],
"syslog-port": 1514,
},
"workload-b": {"log-files": ["/tmp/worload-b.log"], "syslog-port": 1515},
},
relation_name="log-proxy",
)
self.framework.observe(
self._log_proxy.on.promtail_digest_error,
self._promtail_error,
)
def _promtail_error(self, event):
logger.error(event.message)
self.unit.status = BlockedStatus(event.message)
```
Any time the relation between a provider charm and a LogProxy consumer charm is
established, a `LogProxyEndpointJoined` event is fired. In the consumer side is it
possible to observe this event with:
```python
self.framework.observe(
self._log_proxy.on.log_proxy_endpoint_joined,
self._on_log_proxy_endpoint_joined,
)
```
Any time there are departures in relations between the consumer charm and the provider
the consumer charm is informed, through a `LogProxyEndpointDeparted` event, for instance:
```python
self.framework.observe(
self._log_proxy.on.log_proxy_endpoint_departed,
self._on_log_proxy_endpoint_departed,
)
```
The consumer charm can then choose to update its configuration in both situations.
Note that:
- You can configure your syslog software using `localhost` as the address and the method
`LogProxyConsumer.syslog_port("container_name")` to get the port, or, alternatively, if you are using rsyslog
you may use the method `LogProxyConsumer.rsyslog_config("container_name")`.
2. Modify the `metadata.yaml` file to add:
- The `log-proxy` relation in the `requires` section:
```yaml
requires:
log-proxy:
interface: loki_push_api
optional: true
```
Once the library is implemented in a Charmed Operator and a relation is established with
the charm that implements the `loki_push_api` interface, the library will inject a
Pebble layer that runs Promtail in the workload container to send logs.
By default, the promtail binary injected into the container will be downloaded from the internet.
If, for any reason, the container has limited network access, you may allow charm administrators
to provide their own promtail binary at runtime by adding the following snippet to your charm
metadata:
```yaml
resources:
promtail-bin:
type: file
description: Promtail binary for logging
filename: promtail-linux
```
Which would then allow operators to deploy the charm this way:
```
juju deploy \
./your_charm.charm \
--resource promtail-bin=/tmp/promtail-linux-amd64
```
If a different resource name is used, it can be specified with the `promtail_resource_name`
argument to the `LogProxyConsumer` constructor.
The object can emit a `PromtailDigestError` event:
- Promtail binary cannot be downloaded.
- The sha256 sum mismatch for promtail binary.
The object can raise a `ContainerNotFoundError` event:
- No `container_name` parameter has been specified and the Pod has more than 1 container.
These can be monitored via the PromtailDigestError events via:
```python
self.framework.observe(
self._loki_consumer.on.promtail_digest_error,
self._promtail_error,
)
def _promtail_error(self, event):
logger.error(msg)
self.unit.status = BlockedStatus(event.message)
)
```
## LogForwarder class Usage
Let's say that we have a charm's workload that writes logs to the standard output (stdout),
and we need to send those logs to a workload implementing the `loki_push_api` interface,
such as `Loki` or `Grafana Agent`. To know how to reach a Loki instance, a charm would
typically use the `loki_push_api` interface.
Use the `LogForwarder` class by instantiating it in the `__init__` method of the charm:
```python
from charms.loki_k8s.v1.loki_push_api import LogForwarder
...
def __init__(self, *args):
...
self._log_forwarder = LogForwarder(
self,
relation_name="logging" # optional, defaults to `logging`
)
```
The `LogForwarder` by default will observe relation events on the `logging` endpoint and
enable/disable log forwarding automatically.
Next, modify the `metadata.yaml` file to add:
The `log-forwarding` relation in the `requires` section:
```yaml
requires:
logging:
interface: loki_push_api
optional: true
```
Once the LogForwader class is implemented in your charm and the relation (implementing the
`loki_push_api` interface) is active and healthy, the library will inject a Pebble layer in
each workload container the charm has access to, to configure Pebble's log forwarding
feature and start sending logs to Loki.
## Alerting Rules
This charm library also supports gathering alerting rules from all related Loki client
charms and enabling corresponding alerts within the Loki charm. Alert rules are
automatically gathered by `LokiPushApiConsumer` object from a directory conventionally
named `loki_alert_rules`.
This directory must reside at the top level in the `src` folder of the
consumer charm. Each file in this directory is assumed to be a single alert rule
in YAML format. The file name must have the `.rule` extension.
The format of this alert rule conforms to the
[Loki docs](https://grafana.com/docs/loki/latest/rules/#alerting-rules).
An example of the contents of one such file is shown below.
```yaml
alert: HighPercentageError
expr: |
sum(rate({%%juju_topology%%} |= "error" [5m])) by (job)
/
sum(rate({%%juju_topology%%}[5m])) by (job)
> 0.05
for: 10m
labels:
severity: page
annotations:
summary: High request latency
```
It is **critical** to use the `%%juju_topology%%` filter in the expression for the alert
rule shown above. This filter is a stub that is automatically replaced by the
`LokiPushApiConsumer` following Loki Client's Juju topology (application, model and its
UUID). Such a topology filter is essential to ensure that alert rules submitted by one
provider charm generates alerts only for that same charm.
The Loki charm may be related to multiple Loki client charms. Without this, filter
rules submitted by one provider charm will also result in corresponding alerts for other
provider charms. Hence, every alert rule expression must include such a topology filter stub.
Gathering alert rules and generating rule files within the Loki charm is easily done using
the `alerts()` method of `LokiPushApiProvider`. Alerts generated by Loki will automatically
include Juju topology labels in the alerts. These labels indicate the source of the alert.
The following labels are automatically added to every alert
- `juju_model`
- `juju_model_uuid`
- `juju_application`
Whether alert rules files does not contain the keys `alert` or `expr` or there is no alert
rules file in `alert_rules_path` a `loki_push_api_alert_rules_error` event is emitted.
To handle these situations the event must be observed in the `LokiClientCharm` charm.py file:
```python
class LokiClientCharm(CharmBase):
def __init__(self, *args):
super().__init__(*args)
...
self._loki_consumer = LokiPushApiConsumer(self)
self.framework.observe(
self._loki_consumer.on.loki_push_api_alert_rules_error,
self._alert_rules_error
)
def _alert_rules_error(self, event):
self.unit.status = BlockedStatus(event.message)
```
## Relation Data
The Loki charm uses both application and unit relation data to obtain information regarding
Loki Push API and alert rules.
Units of consumer charm send their alert rules over app relation data using the `alert_rules`
key.
## Charm logging
The `charms.loki_k8s.v0.charm_logging` library can be used in conjunction with this one to configure python's
logging module to forward all logs to Loki via the loki-push-api interface.
```python
from lib.charms.loki_k8s.v0.charm_logging import log_charm
from lib.charms.loki_k8s.v1.loki_push_api import charm_logging_config, LokiPushApiConsumer
@log_charm(logging_endpoint="my_endpoints", server_cert="cert_path")
class MyCharm(...):
_cert_path = "/path/to/cert/on/charm/container.crt"
def __init__(self, ...):
self.logging = LokiPushApiConsumer(...)
self.my_endpoints, self.cert_path = charm_logging_config(
self.logging, self._cert_path)
```
Do this, and all charm logs will be forwarded to Loki as soon as a relation is formed.
"""
import json
import logging
import os
import platform
import re
import socket
import subprocess
import tempfile
import typing
from copy import deepcopy
from gzip import GzipFile
from hashlib import sha256
from io import BytesIO
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
from urllib import request
from urllib.error import URLError
import yaml
from cosl import JujuTopology
from ops.charm import (
CharmBase,
HookEvent,
PebbleReadyEvent,
RelationBrokenEvent,
RelationCreatedEvent,
RelationDepartedEvent,
RelationEvent,
RelationJoinedEvent,
RelationRole,
WorkloadEvent,
)
from ops.framework import EventBase, EventSource, Object, ObjectEvents
from ops.jujuversion import JujuVersion
from ops.model import Container, ModelError, Relation
from ops.pebble import APIError, ChangeError, Layer, PathError, ProtocolError
# The unique Charmhub library identifier, never change it
LIBID = "bf76f23cdd03464b877c52bd1d2f563e"
# Increment this major API version when introducing breaking changes
LIBAPI = 1
# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 13
PYDEPS = ["cosl"]
logger = logging.getLogger(__name__)
RELATION_INTERFACE_NAME = "loki_push_api"
DEFAULT_RELATION_NAME = "logging"
DEFAULT_ALERT_RULES_RELATIVE_PATH = "./src/loki_alert_rules"
DEFAULT_LOG_PROXY_RELATION_NAME = "log-proxy"
PROMTAIL_BASE_URL = "https://github.com/canonical/loki-k8s-operator/releases/download"
# To update Promtail version you only need to change the PROMTAIL_VERSION and
# update all sha256 sums in PROMTAIL_BINARIES. To support a new architecture
# you only need to add a new key value pair for the architecture in PROMTAIL_BINARIES.
PROMTAIL_VERSION = "v2.9.7"
PROMTAIL_ARM_BINARY = {
"filename": "promtail-static-arm64",
"zipsha": "c083fdb45e5c794103f974eeb426489b4142438d9e10d0ae272b2aff886e249b",
"binsha": "4cd055c477a301c0bdfdbcea514e6e93f6df5d57425ce10ffc77f3e16fec1ddf",
}
PROMTAIL_BINARIES = {
"amd64": {
"filename": "promtail-static-amd64",
"zipsha": "6873cbdabf23062aeefed6de5f00ff382710332af3ab90a48c253ea17e08f465",
"binsha": "28da9b99f81296fe297831f3bc9d92aea43b4a92826b8ff04ba433b8cb92fb50",
},
"arm64": PROMTAIL_ARM_BINARY,
"aarch64": PROMTAIL_ARM_BINARY,
}
# Paths in `charm` container
BINARY_DIR = "/tmp"
# Paths in `workload` container
WORKLOAD_BINARY_DIR = "/opt/promtail"
WORKLOAD_CONFIG_DIR = "/etc/promtail"
WORKLOAD_CONFIG_FILE_NAME = "promtail_config.yaml"
WORKLOAD_CONFIG_PATH = "{}/{}".format(WORKLOAD_CONFIG_DIR, WORKLOAD_CONFIG_FILE_NAME)
WORKLOAD_POSITIONS_PATH = "{}/positions.yaml".format(WORKLOAD_BINARY_DIR)
WORKLOAD_SERVICE_NAME = "promtail"
# These are the initial port values. As we can have more than one container,
# we use odd and even numbers to avoid collisions.
# Each new container adds 2 to the previous value.
HTTP_LISTEN_PORT_START = 9080 # even start port
GRPC_LISTEN_PORT_START = 9095 # odd start port
class LokiPushApiError(Exception):
"""Base class for errors raised by this module."""
class RelationNotFoundError(LokiPushApiError):
"""Raised if there is no relation with the given name."""
def __init__(self, relation_name: str):
self.relation_name = relation_name
self.message = "No relation named '{}' found".format(relation_name)
super().__init__(self.message)
class RelationInterfaceMismatchError(LokiPushApiError):
"""Raised if the relation with the given name has a different interface."""
def __init__(
self,
relation_name: str,
expected_relation_interface: str,
actual_relation_interface: str,
):
self.relation_name = relation_name
self.expected_relation_interface = expected_relation_interface
self.actual_relation_interface = actual_relation_interface
self.message = (
"The '{}' relation has '{}' as interface rather than the expected '{}'".format(
relation_name, actual_relation_interface, expected_relation_interface
)
)
super().__init__(self.message)
class RelationRoleMismatchError(LokiPushApiError):
"""Raised if the relation with the given name has a different direction."""
def __init__(
self,
relation_name: str,
expected_relation_role: RelationRole,
actual_relation_role: RelationRole,
):
self.relation_name = relation_name
self.expected_relation_interface = expected_relation_role
self.actual_relation_role = actual_relation_role
self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format(
relation_name, repr(actual_relation_role), repr(expected_relation_role)
)
super().__init__(self.message)
def _validate_relation_by_interface_and_direction(
charm: CharmBase,
relation_name: str,
expected_relation_interface: str,
expected_relation_role: RelationRole,
):
"""Verifies that a relation has the necessary characteristics.
Verifies that the `relation_name` provided: (1) exists in metadata.yaml,
(2) declares as interface the interface name passed as `relation_interface`
and (3) has the right "direction", i.e., it is a relation that `charm`
provides or requires.
Args:
charm: a `CharmBase` object to scan for the matching relation.
relation_name: the name of the relation to be verified.
expected_relation_interface: the interface name to be matched by the
relation named `relation_name`.
expected_relation_role: whether the `relation_name` must be either
provided or required by `charm`.
Raises:
RelationNotFoundError: If there is no relation in the charm's metadata.yaml
with the same name as provided via `relation_name` argument.
RelationInterfaceMismatchError: The relation with the same name as provided
via `relation_name` argument does not have the same relation interface
as specified via the `expected_relation_interface` argument.
RelationRoleMismatchError: If the relation with the same name as provided
via `relation_name` argument does not have the same role as specified
via the `expected_relation_role` argument.
"""
if relation_name not in charm.meta.relations:
raise RelationNotFoundError(relation_name)
relation = charm.meta.relations[relation_name]
actual_relation_interface = relation.interface_name
if actual_relation_interface != expected_relation_interface:
raise RelationInterfaceMismatchError(
relation_name,
expected_relation_interface,
actual_relation_interface, # pyright: ignore
)
if expected_relation_role == RelationRole.provides:
if relation_name not in charm.meta.provides:
raise RelationRoleMismatchError(
relation_name, RelationRole.provides, RelationRole.requires
)
elif expected_relation_role == RelationRole.requires:
if relation_name not in charm.meta.requires:
raise RelationRoleMismatchError(
relation_name, RelationRole.requires, RelationRole.provides
)
else:
raise Exception("Unexpected RelationDirection: {}".format(expected_relation_role))
class InvalidAlertRulePathError(Exception):
"""Raised if the alert rules folder cannot be found or is otherwise invalid."""
def __init__(
self,
alert_rules_absolute_path: Path,
message: str,
):
self.alert_rules_absolute_path = alert_rules_absolute_path
self.message = message
super().__init__(self.message)
def _is_official_alert_rule_format(rules_dict: dict) -> bool:
"""Are alert rules in the upstream format as supported by Loki.
Alert rules in dictionary format are in "official" form if they
contain a "groups" key, since this implies they contain a list of
alert rule groups.
Args:
rules_dict: a set of alert rules in Python dictionary format
Returns:
True if alert rules are in official Loki file format.
"""
return "groups" in rules_dict
def _is_single_alert_rule_format(rules_dict: dict) -> bool:
"""Are alert rules in single rule format.
The Loki charm library supports reading of alert rules in a
custom format that consists of a single alert rule per file. This
does not conform to the official Loki alert rule file format
which requires that each alert rules file consists of a list of
alert rule groups and each group consists of a list of alert
rules.
Alert rules in dictionary form are considered to be in single rule
format if in the least it contains two keys corresponding to the
alert rule name and alert expression.
Returns:
True if alert rule is in single rule file format.
"""
# one alert rule per file
return set(rules_dict) >= {"alert", "expr"}
class AlertRules:
"""Utility class for amalgamating Loki alert rule files and injecting juju topology.
An `AlertRules` object supports aggregating alert rules from files and directories in both
official and single rule file formats using the `add_path()` method. All the alert rules
read are annotated with Juju topology labels and amalgamated into a single data structure
in the form of a Python dictionary using the `as_dict()` method. Such a dictionary can be
easily dumped into JSON format and exchanged over relation data. The dictionary can also
be dumped into YAML format and written directly into an alert rules file that is read by
Loki. Note that multiple `AlertRules` objects must not be written into the same file,
since Loki allows only a single list of alert rule groups per alert rules file.
The official Loki format is a YAML file conforming to the Loki documentation
(https://grafana.com/docs/loki/latest/api/#list-rule-groups).
The custom single rule format is a subsection of the official YAML, having a single alert
rule, effectively "one alert per file".
"""
# This class uses the following terminology for the various parts of a rule file:
# - alert rules file: the entire groups[] yaml, including the "groups:" key.
# - alert groups (plural): the list of groups[] (a list, i.e. no "groups:" key) - it is a list
# of dictionaries that have the "name" and "rules" keys.
# - alert group (singular): a single dictionary that has the "name" and "rules" keys.
# - alert rules (plural): all the alerts in a given alert group - a list of dictionaries with
# the "alert" and "expr" keys.
# - alert rule (singular): a single dictionary that has the "alert" and "expr" keys.
def __init__(self, topology: Optional[JujuTopology] = None):
"""Build and alert rule object.
Args:
topology: a `JujuTopology` instance that is used to annotate all alert rules.
"""
self.topology = topology
self.tool = CosTool(None)
self.alert_groups = [] # type: List[dict]
def _from_file(self, root_path: Path, file_path: Path) -> List[dict]:
"""Read a rules file from path, injecting juju topology.
Args:
root_path: full path to the root rules folder (used only for generating group name)
file_path: full path to a *.rule file.
Returns:
A list of dictionaries representing the rules file, if file is valid (the structure is
formed by `yaml.safe_load` of the file); an empty list otherwise.
"""
with file_path.open() as rf:
# Load a list of rules from file then add labels and filters
try:
rule_file = yaml.safe_load(rf) or {}
except Exception as e:
logger.error("Failed to read alert rules from %s: %s", file_path.name, e)
return []
if _is_official_alert_rule_format(rule_file):
alert_groups = rule_file["groups"]
elif _is_single_alert_rule_format(rule_file):
# convert to list of alert groups
# group name is made up from the file name
alert_groups = [{"name": file_path.stem, "rules": [rule_file]}]
else:
# invalid/unsupported
reason = "file is empty" if not rule_file else "unexpected file structure"
logger.error("Invalid rules file (%s): %s", reason, file_path.name)
return []
# update rules with additional metadata
for alert_group in alert_groups:
# update group name with topology and sub-path
alert_group["name"] = self._group_name(
str(root_path),
str(file_path),
alert_group["name"],
)
# add "juju_" topology labels
for alert_rule in alert_group["rules"]:
if "labels" not in alert_rule:
alert_rule["labels"] = {}
if self.topology:
# only insert labels that do not already exist
for label, val in self.topology.label_matcher_dict.items():
if label not in alert_rule["labels"]:
alert_rule["labels"][label] = val
# insert juju topology filters into a prometheus alert rule
# logql doesn't like empty matchers, so add a job matcher which hits
# any string as a "wildcard" which the topology labels will
# filter down
alert_rule["expr"] = self.tool.inject_label_matchers(
re.sub(r"%%juju_topology%%", r'job=~".+"', alert_rule["expr"]),
self.topology.label_matcher_dict,
)
return alert_groups
def _group_name(
self,
root_path: typing.Union[Path, str],
file_path: typing.Union[Path, str],
group_name: str,
) -> str:
"""Generate group name from path and topology.
The group name is made up of the relative path between the root dir_path, the file path,
and topology identifier.
Args:
root_path: path to the root rules dir.
file_path: path to rule file.
group_name: original group name to keep as part of the new augmented group name
Returns:
New group name, augmented by juju topology and relative path.
"""
file_path = Path(file_path) if not isinstance(file_path, Path) else file_path
root_path = Path(root_path) if not isinstance(root_path, Path) else root_path
rel_path = file_path.parent.relative_to(root_path.as_posix())
# We should account for both absolute paths and Windows paths. Convert it to a POSIX
# string, strip off any leading /, then join it
path_str = ""
if not rel_path == Path("."):
# Get rid of leading / and optionally drive letters so they don't muck up
# the template later, since Path.parts returns them. The 'if relpath.is_absolute ...'
# isn't even needed since re.sub doesn't throw exceptions if it doesn't match, so it's
# optional, but it makes it clear what we're doing.
# Note that Path doesn't actually care whether the path is valid just to instantiate
# the object, so we can happily strip that stuff out to make templating nicer
rel_path = Path(
re.sub(r"^([A-Za-z]+:)?/", "", rel_path.as_posix())
if rel_path.is_absolute()
else str(rel_path)
)
# Get rid of relative path characters in the middle which both os.path and pathlib
# leave hanging around. We could use path.resolve(), but that would lead to very
# long template strings when rules come from pods and/or other deeply nested charm
# paths
path_str = "_".join(filter(lambda x: x not in ["..", "/"], rel_path.parts))
# Generate group name:
# - name, from juju topology
# - suffix, from the relative path of the rule file;
group_name_parts = [self.topology.identifier] if self.topology else []
group_name_parts.extend([path_str, group_name, "alerts"])
# filter to remove empty strings
return "_".join(filter(lambda x: x, group_name_parts))
@classmethod
def _multi_suffix_glob(
cls, dir_path: Path, suffixes: List[str], recursive: bool = True
) -> list:
"""Helper function for getting all files in a directory that have a matching suffix.
Args:
dir_path: path to the directory to glob from.
suffixes: list of suffixes to include in the glob (items should begin with a period).
recursive: a flag indicating whether a glob is recursive (nested) or not.
Returns:
List of files in `dir_path` that have one of the suffixes specified in `suffixes`.
"""
all_files_in_dir = dir_path.glob("**/*" if recursive else "*")
return list(filter(lambda f: f.is_file() and f.suffix in suffixes, all_files_in_dir))
def _from_dir(self, dir_path: Path, recursive: bool) -> List[dict]:
"""Read all rule files in a directory.
All rules from files for the same directory are loaded into a single
group. The generated name of this group includes juju topology.
By default, only the top directory is scanned; for nested scanning, pass `recursive=True`.
Args:
dir_path: directory containing *.rule files (alert rules without groups).
recursive: flag indicating whether to scan for rule files recursively.
Returns:
a list of dictionaries representing prometheus alert rule groups, each dictionary
representing an alert group (structure determined by `yaml.safe_load`).
"""
alert_groups = [] # type: List[dict]
# Gather all alerts into a list of groups
for file_path in self._multi_suffix_glob(dir_path, [".rule", ".rules"], recursive):
alert_groups_from_file = self._from_file(dir_path, file_path)
if alert_groups_from_file:
logger.debug("Reading alert rule from %s", file_path)
alert_groups.extend(alert_groups_from_file)
return alert_groups
def add_path(self, path_str: str, *, recursive: bool = False):
"""Add rules from a dir path.
All rules from files are aggregated into a data structure representing a single rule file.
All group names are augmented with juju topology.
Args:
path_str: either a rules file or a dir of rules files.
recursive: whether to read files recursively or not (no impact if `path` is a file).
Raises:
InvalidAlertRulePathError: if the provided path is invalid.
"""
path = Path(path_str) # type: Path
if path.is_dir():
self.alert_groups.extend(self._from_dir(path, recursive))
elif path.is_file():
self.alert_groups.extend(self._from_file(path.parent, path))
else:
logger.debug("The alerts file does not exist: %s", path)
def as_dict(self) -> dict:
"""Return standard alert rules file in dict representation.
Returns:
a dictionary containing a single list of alert rule groups.
The list of alert rule groups is provided as value of the
"groups" dictionary key.
"""
return {"groups": self.alert_groups} if self.alert_groups else {}
def _resolve_dir_against_charm_path(charm: CharmBase, *path_elements: str) -> str:
"""Resolve the provided path items against the directory of the main file.
Look up the directory of the `main.py` file being executed. This is normally
going to be the charm.py file of the charm including this library. Then, resolve
the provided path elements and, if the result path exists and is a directory,
return its absolute path; otherwise, raise en exception.
Raises:
InvalidAlertRulePathError, if the path does not exist or is not a directory.
"""