From 4a859f9c2f0f11cc4972e1176b0be7999d121eac Mon Sep 17 00:00:00 2001 From: Alberto Donato Date: Thu, 2 Jan 2025 10:21:11 +0100 Subject: [PATCH] feat: support YAML tags for env, file, and include (fixes #188) (#216) --- docs/configuration.rst | 37 ++++++++++----- examples/oracle-stats.yaml | 2 +- examples/postgresql-stats.yaml | 2 +- query_exporter/config.py | 13 ++++- query_exporter/yaml.py | 64 +++++++++++++++++++++++++ tests/config_test.py | 18 ++++++- tests/yaml_test.py | 87 ++++++++++++++++++++++++++++++++++ 7 files changed, 207 insertions(+), 16 deletions(-) create mode 100644 query_exporter/yaml.py create mode 100644 tests/yaml_test.py diff --git a/docs/configuration.rst b/docs/configuration.rst index 2365481..dd26d18 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -4,6 +4,29 @@ Configuration file format Configuration is provided as a YAML file, composed by a few sections, as described in the following sections. +The following tags are supported in the configuration file: + +``!include ``: + include the content of another YAML file. This allows modularizing + configuration. + + If the specified path is not absolute, it's considered relative to the + including file. + +``!file ``: + include the text content of a file as a string. + + If the specified path is not absolute, it's considered relative to the + including file. + +``!env ``: + expand to the value of the specified environment variable. + + Note that the value of the variable is interpreted as YAML (and thus JSON), + allowing for specifying values other than strings (e.g. integers/floats). + + The specified variable must be set. + ``databases`` section --------------------- @@ -43,17 +66,9 @@ Each database definitions can have the following keys: **Note**: in the string form, username, password and options need to be URL-encoded, whereas this is done automatically for the key/value form. - It's also possible to get the connection string indirectly from other sources: - - - from an environment variable (e.g. ``$CONNECTION_STRING``) by setting ``dsn`` to:: - - env:CONNECTION_STRING - - - from a file, containing only the DSN value, by setting ``dsn`` to:: - - file:/path/to/file - - These forms only support specifying the actual DNS in the string form. + **Note**: use of the ``env:`` and ``file:`` prefixes in the string form is + deprecated, and will be dropped in the 4.0 release. Use ``!env`` and + ``!file`` YAML tags instead. ``connect-sql``: An optional list of queries to run right after database connection. This can diff --git a/examples/oracle-stats.yaml b/examples/oracle-stats.yaml index d8ec507..94fab89 100644 --- a/examples/oracle-stats.yaml +++ b/examples/oracle-stats.yaml @@ -7,7 +7,7 @@ databases: oracle: - dsn: env:ORACLE_DATABASE_DSN + dsn: !env ORACLE_DATABASE_DSN metrics: oracle_sessions: diff --git a/examples/postgresql-stats.yaml b/examples/postgresql-stats.yaml index 3e48c8b..7574ffd 100644 --- a/examples/postgresql-stats.yaml +++ b/examples/postgresql-stats.yaml @@ -11,7 +11,7 @@ databases: pg: - dsn: env:PG_DATABASE_DSN + dsn: !env PG_DATABASE_DSN metrics: pg_process: diff --git a/query_exporter/config.py b/query_exporter/config.py index ed80880..7687fba 100644 --- a/query_exporter/config.py +++ b/query_exporter/config.py @@ -28,6 +28,7 @@ Query, QueryMetric, ) +from .yaml import load_yaml_config # metric for counting database errors DB_ERRORS_METRIC_NAME = "database_errors" @@ -105,8 +106,10 @@ def load_config( if logger is None: logger = structlog.get_logger() - with config_path.open() as fd: - data = defaultdict(dict, yaml.safe_load(fd)) + try: + data = defaultdict(dict, load_yaml_config(config_path)) + except yaml.scanner.ScannerError as e: + raise ConfigError(str(e)) _validate_config(data) databases, database_labels = _get_databases(data["databases"], env) extra_labels = frozenset([DATABASE_LABEL]) | database_labels @@ -347,6 +350,12 @@ def from_file(filename: str) -> str: source, value = dsn.split(":", 1) handler = origins.get(source) if handler is not None: + logger = structlog.get_logger() + logger.warn( + f"deprecated DSN source '{dsn}', use '!{source} {value}' instead", + source=source, + value=value, + ) return handler(value) return dsn diff --git a/query_exporter/yaml.py b/query_exporter/yaml.py new file mode 100644 index 0000000..ab3cad9 --- /dev/null +++ b/query_exporter/yaml.py @@ -0,0 +1,64 @@ +import os +from pathlib import Path +import typing as t + +import yaml + + +def load_yaml_config(path: Path) -> t.Any: + """Load a YAML document from a file.""" + + class ConfigLoader(yaml.SafeLoader): + """Subclass supporting tags.""" + + base_path: t.ClassVar[Path] + + def config_loader(path: Path) -> type[ConfigLoader]: + class ConfigLoaderWithPath(ConfigLoader): + base_path = path + + return ConfigLoaderWithPath + + def tag_env(loader: ConfigLoader, node: yaml.nodes.ScalarNode) -> t.Any: + env = loader.construct_scalar(node) + value = os.getenv(env) + if value is None: + raise yaml.scanner.ScannerError( + "while processing 'env' tag", + None, + f"variable {env} undefined", + loader.get_mark(), # type: ignore + ) + return yaml.safe_load(value) + + def tag_file(loader: ConfigLoader, node: yaml.nodes.ScalarNode) -> str: + path = loader.base_path / loader.construct_scalar(node) + if not path.is_file(): + raise yaml.scanner.ScannerError( + "while processing 'file' tag", + None, + f"file {path} not found", + loader.get_mark(), # type: ignore + ) + return path.read_text().strip() + + def tag_include( + loader: ConfigLoader, node: yaml.nodes.ScalarNode + ) -> t.Any: + path = loader.base_path / loader.construct_scalar(node) + if not path.is_file(): + raise yaml.scanner.ScannerError( + "while processing 'include' tag", + None, + f"file {path} not found", + loader.get_mark(), # type: ignore + ) + with path.open() as fd: + return yaml.load(fd, config_loader(path.parent)) + + ConfigLoader.add_constructor("!env", tag_env) + ConfigLoader.add_constructor("!file", tag_file) + ConfigLoader.add_constructor("!include", tag_include) + + with path.open() as fd: + return yaml.load(fd, config_loader(path.parent)) diff --git a/tests/config_test.py b/tests/config_test.py index 2e97570..e2d9085 100644 --- a/tests/config_test.py +++ b/tests/config_test.py @@ -128,6 +128,13 @@ def write(data: dict[str, t.Any]) -> Path: class TestLoadConfig: + def test_load_invalid_format(self, tmp_path: Path) -> None: + config_file = tmp_path / "config" + config_file.write_text("foo: !env UNSET") + with pytest.raises(ConfigError) as err: + load_config(config_file) + assert "variable UNSET undefined" in str(err.value) + def test_load_databases_section(self, write_config: ConfigWriter) -> None: cfg = { "databases": { @@ -156,7 +163,9 @@ def test_load_databases_section(self, write_config: ConfigWriter) -> None: assert not database2.autocommit def test_load_databases_dsn_from_env( - self, write_config: ConfigWriter + self, + log: StructuredLogCapture, + write_config: ConfigWriter, ) -> None: cfg = { "databases": {"db1": {"dsn": "env:FOO"}}, @@ -166,6 +175,9 @@ def test_load_databases_dsn_from_env( config_file = write_config(cfg) config = load_config(config_file, env={"FOO": "sqlite://"}) assert config.databases["db1"].dsn == "sqlite://" + assert log.has( + "deprecated DSN source 'env:FOO', use '!env FOO' instead" + ) def test_load_databases_missing_dsn( self, write_config: ConfigWriter @@ -262,6 +274,7 @@ def test_load_databases_dsn_undefined_env( def test_load_databases_dsn_from_file( self, tmp_path: Path, + log: StructuredLogCapture, write_config: ConfigWriter, ) -> None: dsn = "sqlite:///foo" @@ -275,6 +288,9 @@ def test_load_databases_dsn_from_file( config_file = write_config(cfg) config = load_config(config_file) assert config.databases["db1"].dsn == dsn + assert log.has( + f"deprecated DSN source 'file:{dsn_path}', use '!file {dsn_path}' instead" + ) def test_load_databases_dsn_from_file_not_found( self, write_config: ConfigWriter diff --git a/tests/yaml_test.py b/tests/yaml_test.py new file mode 100644 index 0000000..0560f65 --- /dev/null +++ b/tests/yaml_test.py @@ -0,0 +1,87 @@ +from pathlib import Path +from textwrap import dedent +import typing as t + +import pytest +import yaml + +from query_exporter.yaml import load_yaml_config + + +class TestLoadYAMLConfig: + def test_load(self, tmp_path: Path) -> None: + config = tmp_path / "config.yaml" + config.write_text( + dedent( + """ + a: b + c: d + """ + ) + ) + assert load_yaml_config(config) == {"a": "b", "c": "d"} + + @pytest.mark.parametrize("env_value", ["foo", 3, False, {"foo": "bar"}]) + def test_load_env( + self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, env_value: t.Any + ) -> None: + monkeypatch.setenv("FOO", yaml.dump(env_value)) + config = tmp_path / "config.yaml" + config.write_text("x: !env FOO") + assert load_yaml_config(config) == {"x": env_value} + + def test_load_env_not_found(self, tmp_path: Path) -> None: + config = tmp_path / "config.yaml" + config.write_text("x: !env FOO") + with pytest.raises(yaml.scanner.ScannerError) as err: + load_yaml_config(config) + assert "variable FOO undefined" in str(err.value) + + def test_load_file_relative_path(self, tmp_path: Path) -> None: + (tmp_path / "foo.txt").write_text("some text") + config = tmp_path / "config.yaml" + config.write_text("x: !file foo.txt") + assert load_yaml_config(config) == {"x": "some text"} + + def test_load_file_absolute_path(self, tmp_path: Path) -> None: + text_file = tmp_path / "foo.txt" + text_file.write_text("some text") + config = tmp_path / "config.yaml" + config.write_text(f"x: !file {text_file.absolute()!s}") + assert load_yaml_config(config) == {"x": "some text"} + + def test_load_file_not_found(self, tmp_path: Path) -> None: + config = tmp_path / "config.yaml" + config.write_text("x: !file not-here.txt") + with pytest.raises(yaml.scanner.ScannerError) as err: + load_yaml_config(config) + assert f"file {tmp_path / 'not-here.txt'} not found" in str(err.value) + + def test_load_include_relative_path(self, tmp_path: Path) -> None: + (tmp_path / "foo.yaml").write_text("foo: bar") + config = tmp_path / "config.yaml" + config.write_text("x: !include foo.yaml") + assert load_yaml_config(config) == {"x": {"foo": "bar"}} + + def test_load_include_absolute_path(self, tmp_path: Path) -> None: + other_file = tmp_path / "foo.yaml" + other_file.write_text("foo: bar") + config = tmp_path / "config.yaml" + config.write_text(f"x: !include {other_file.absolute()!s}") + assert load_yaml_config(config) == {"x": {"foo": "bar"}} + + def test_load_include_multiple(self, tmp_path: Path) -> None: + subdir = tmp_path / "subdir" + subdir.mkdir() + (subdir / "bar.yaml").write_text("[a, b, c]") + (subdir / "foo.yaml").write_text("foo: !include bar.yaml") + config = tmp_path / "config.yaml" + config.write_text("x: !include subdir/foo.yaml") + assert load_yaml_config(config) == {"x": {"foo": ["a", "b", "c"]}} + + def test_load_include_not_found(self, tmp_path: Path) -> None: + config = tmp_path / "config.yaml" + config.write_text("x: !include not-here.yaml") + with pytest.raises(yaml.scanner.ScannerError) as err: + load_yaml_config(config) + assert f"file {tmp_path / 'not-here.yaml'} not found" in str(err.value)