diff --git a/metaphor/common/base_config.py b/metaphor/common/base_config.py index 00cf2428..b1c0a874 100644 --- a/metaphor/common/base_config.py +++ b/metaphor/common/base_config.py @@ -1,3 +1,4 @@ +from pathlib import Path from typing import Optional, Type, TypeVar import yaml @@ -28,9 +29,27 @@ class BaseConfig: """ output: OutputConfig + """ + No default value here, otherwise dataclass inheritance would not work. + + If this field is `None`, the connector will store the extracted data to + `PWD`/`timestamp`. + + To disable storing data altogether, set this field to `OutputConfig()`. + """ @classmethod def from_yaml_file(cls: Type[T], path: str) -> T: with open(path, encoding="utf8") as fin: obj = yaml.safe_load(fin.read()) + + # So that user can just ignore this field in their config file. + if "output" not in obj: + obj["output"] = { + "file": { + "directory": Path.cwd() + .absolute() + .as_posix() # timestamp is added in file sink + } + } return TypeAdapter(cls).validate_python(variable_substitution(obj)) diff --git a/metaphor/common/docs/output.md b/metaphor/common/docs/output.md index 1a2c4498..eb2178ef 100644 --- a/metaphor/common/docs/output.md +++ b/metaphor/common/docs/output.md @@ -4,7 +4,9 @@ You can configure the connector to output to files or API. ## Output to Local Files -File-based output is the preferred way as it enables decoupling between the connector and ingestion pipeline. Add the following fragment to your config file: +File-based output is the preferred way as it enables decoupling between the connector and ingestion pipeline. By default, the connector will write to the directory `${pwd}/${CURRENT_TIMESTAMP}`. + +To write the extracted data to a specific location, add the following fragment to your config file: ```yaml output: diff --git a/pyproject.toml b/pyproject.toml index ccc08802..73a2c58d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.13.110" +version = "0.13.111" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "] diff --git a/tests/common/configs/missing.yml b/tests/common/configs/missing_output.yml similarity index 100% rename from tests/common/configs/missing.yml rename to tests/common/configs/missing_output.yml diff --git a/tests/common/test_config.py b/tests/common/test_config.py index 631f268c..308348df 100644 --- a/tests/common/test_config.py +++ b/tests/common/test_config.py @@ -23,9 +23,11 @@ def test_yaml_config(test_root_dir): ) -def test_yaml_config_with_missing_config(test_root_dir): - with pytest.raises(ValidationError): - BaseConfig.from_yaml_file(f"{test_root_dir}/common/configs/missing.yml") +def test_missing_output_config(test_root_dir): + missing_output = BaseConfig.from_yaml_file( + f"{test_root_dir}/common/configs/missing_output.yml" + ) + assert missing_output.output.file and missing_output.output.file.directory @dataclass(config=ConnectorConfig) @@ -36,7 +38,8 @@ class AnotherBaseConfig(BaseConfig): def test_yaml_config_with_extra_config(test_root_dir): # BaseConfig allows extras config = BaseConfig.from_yaml_file(f"{test_root_dir}/common/configs/extend.yml") - assert config == BaseConfig(output={}) + assert config == BaseConfig(output=OutputConfig()) + assert not config.output.file # AnotherBaseConfig does not allow extras with pytest.raises(ValidationError): @@ -50,4 +53,4 @@ class ExtendConfig(BaseConfig): def test_extend_config(test_root_dir): config = ExtendConfig.from_yaml_file(f"{test_root_dir}/common/configs/extend.yml") - assert config == ExtendConfig(foo="bar", output={}) + assert config == ExtendConfig(foo="bar", output=OutputConfig())