Skip to content

Commit

Permalink
Merge pull request #2 from mraspaud/add-scripts
Browse files Browse the repository at this point in the history
Add cli and git-based version
  • Loading branch information
mraspaud authored Apr 15, 2024
2 parents 13b82dd + e56e54c commit b4d79df
Show file tree
Hide file tree
Showing 8 changed files with 207 additions and 62 deletions.
15 changes: 15 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
exclude: '^$'
fail_fast: false

repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: 'v0.3.7'
hooks:
- id: ruff
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: no-commit-to-branch
3 changes: 2 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@

# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
from pytroll_watchers.version import version

project = "pytroll-watchers"
copyright = "2024, Martin Raspaud"
author = "Martin Raspaud"
release = "0.1.0"
release = version

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
17 changes: 14 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
[project]
name = "pytroll-watchers"
version = "0.1.0"
dynamic = ["version"]
description = "Utility functions and scripts to watch for new files on local or remote filesystems."
authors = [
{ name = "Martin Raspaud", email = "[email protected]" }
]
dependencies = ["universal-pathlib", "trollsift"]
dependencies = ["universal-pathlib", "trollsift", "pyyaml"]
readme = "README.md"
requires-python = ">= 3.10"
license = {file = "LICENSE.txt"}

[project.scripts]
pytroll-watcher = "pytroll_watchers.main_interface:cli"

[project.urls]
"Documentation" = "https://pytroll-watchers.readthedocs.io/en/latest/"

Expand All @@ -20,7 +23,7 @@ publishing = ["posttroll"]
ssh = ["paramiko"]

[build-system]
requires = ["hatchling"]
requires = ["hatchling", "hatch-vcs"]
build-backend = "hatchling.build"

[tool.rye]
Expand All @@ -33,6 +36,13 @@ allow-direct-references = true
[tool.hatch.build.targets.wheel]
packages = ["src/pytroll_watchers"]

[tool.hatch.version]
source = "vcs"

[tool.hatch.build.hooks.vcs]
version-file = "src/pytroll_watchers/version.py"


[tool.ruff]
line-length = 120

Expand All @@ -43,6 +53,7 @@ select = ["A", "B", "D", "E", "W", "F", "I", "N", "PT", "S", "TID", "C90", "Q",
[tool.ruff.lint.per-file-ignores]
"tests/*" = ["S101"] # assert allowed in tests
"docs/source/conf.py" = ["D100", "A001"] # sphinx misbihaving
"src/pytroll_watchers/version.py" = ["D100", "Q000"] # automatically generated by hatch-vcs

[tool.ruff.lint.pydocstyle]
convention = "google"
Expand Down
41 changes: 0 additions & 41 deletions src/pytroll_watchers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,42 +1 @@
"""Main package file for pytroll watchers."""

from pytroll_watchers.local_watcher import file_generator as local_generator
from pytroll_watchers.local_watcher import file_publisher as local_publisher
from pytroll_watchers.minio_notification_watcher import file_generator as minio_generator
from pytroll_watchers.minio_notification_watcher import file_publisher as minio_publisher


def get_publisher_for_backend(backend):
"""Get the right publisher for the given backend.
For the parameters to pass the returned function, check the individual backend documentation pages.
Example:
>>> file_publisher = get_publisher_for_backend("local")
>>> file_publisher(fs_config, publisher_config, message_config)
"""
if backend == "minio":
return minio_publisher
elif backend == "local":
return local_publisher
else:
raise ValueError(f"Unknown backend {backend}.")

def get_generator_for_backend(backend):
"""Get the right generator for the given backend.
For the parameters to pass the returned function, check the individual backend documentation pages.
Example:
>>> file_generator = get_generator_for_backend("local")
>>> for filename, file_metadata in file_generator("/tmp"):
... # do something with filename and file_metadata
"""
if backend == "minio":
return minio_generator
elif backend == "local":
return local_generator
else:
raise ValueError(f"Unknown backend {backend}.")
79 changes: 79 additions & 0 deletions src/pytroll_watchers/main_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""Main interface functions."""

import argparse

import yaml

from pytroll_watchers.local_watcher import file_generator as local_generator
from pytroll_watchers.local_watcher import file_publisher as local_publisher
from pytroll_watchers.minio_notification_watcher import file_generator as minio_generator
from pytroll_watchers.minio_notification_watcher import file_publisher as minio_publisher


def get_publisher_for_backend(backend):
"""Get the right publisher for the given backend.
For the parameters to pass the returned function, check the individual backend documentation pages.
Example:
>>> file_publisher = get_publisher_for_backend("local")
>>> file_publisher(fs_config, publisher_config, message_config)
"""
if backend == "minio":
return minio_publisher
elif backend == "local":
return local_publisher
else:
raise ValueError(f"Unknown backend {backend}.")

def get_generator_for_backend(backend):
"""Get the right generator for the given backend.
For the parameters to pass the returned function, check the individual backend documentation pages.
Example:
>>> file_generator = get_generator_for_backend("local")
>>> for filename, file_metadata in file_generator("/tmp"):
... # do something with filename and file_metadata
"""
if backend == "minio":
return minio_generator
elif backend == "local":
return local_generator
else:
raise ValueError(f"Unknown backend {backend}.")


def publish_from_config(config):
"""Publish files/objects given a config.
Args:
config: a dictionary containing the `backend` string (`local` or `minio`), and `fs_config`, `publisher_config`
and `message_config` dictionaries.
"""
if config["backend"] == "local":
return local_publisher(config["fs_config"], config["publisher_config"], config["message_config"])
elif config["backend"] == "minio":
return minio_publisher(config["fs_config"], config["publisher_config"], config["message_config"])
else:
raise ValueError(f"Unknown backend {config['backend']}")

def cli(args=None):
"""Command-line interface for pytroll-watchers."""
parser = argparse.ArgumentParser(
prog="pytroll-watcher",
description="Watches the appearance of new files/objects on different filesystems.",
epilog="Thanks for using pytroll-watchers!")

parser.add_argument("config", type=str, help="The yaml config file.")

parsed = parser.parse_args(args)

config_file = parsed.config

with open(config_file) as fd:
config_dict = yaml.safe_load(fd.read())

return publish_from_config(config_dict)
22 changes: 11 additions & 11 deletions src/pytroll_watchers/minio_notification_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


def file_publisher(fs_config, publisher_config, message_config):
"""Publish files coming from bucket notifications.
"""Publish objects coming from bucket notifications.
Args:
fs_config: the configuration for the filesystem watching, will be passed as argument to `file_generator`.
Expand All @@ -19,17 +19,17 @@ def file_publisher(fs_config, publisher_config, message_config):


def file_generator(endpoint_url, bucket_name, file_pattern=None, storage_options=None):
"""Generate new files appearing in the watched directory.
"""Generate new objects appearing in the watched bucket.
Args:
endpoint_url: The endpoint_url to use.
bucket_name: The bucket to watch for changes.
file_pattern: The trollsift pattern to use for matching and extracting metadata from the filename.
This must not include the directory.
file_pattern: The trollsift pattern to use for matching and extracting metadata from the object name.
This must not include the prefix.
storage_options: The storage options for the service, for example for specifying a profile to the aws config.
Returns:
A tuple of UPath and file metadata.
A tuple of UPath and metadata.
Examples:
To iterate over new files in `s3:///tmp/`:
Expand All @@ -40,25 +40,25 @@ def file_generator(endpoint_url, bucket_name, file_pattern=None, storage_options
UPath("s3:///tmp/20200428_1000_foo.tif")
"""
file_metadata = {}
object_metadata = {}

if storage_options is None:
storage_options = {}
for record in _record_generator(endpoint_url, bucket_name, storage_options):
for item in record["Records"]:
new_bucket_name = item["s3"]["bucket"]["name"]
new_file_name = item["s3"]["object"]["key"]
object_name = item["s3"]["object"]["key"]
try:
file_metadata = parse_metadata(file_pattern, new_file_name)
object_metadata = parse_metadata(file_pattern, object_name)
except ValueError:
continue

path = UPath(f"s3://{new_bucket_name}/{new_file_name}", **storage_options)
yield path, file_metadata
path = UPath(f"s3://{new_bucket_name}/{object_name}", **storage_options)
yield path, object_metadata


def _record_generator(endpoint_url, bucket_name, profile=None):
"""Generate records for new files in the bucket."""
"""Generate records for new objects in the bucket."""
from minio import Minio
from minio.credentials.providers import AWSConfigProvider

Expand Down
6 changes: 1 addition & 5 deletions src/pytroll_watchers/testing.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Utilities for testing code that uses pytroll watchers."""
"""Pytest fixtures for testing code that uses pytroll watchers."""

from contextlib import contextmanager, nullcontext

Expand Down Expand Up @@ -46,8 +46,4 @@ def fake_listen(*args, **kwargs):
import minio
monkeypatch.setattr(minio.Minio, "listen_bucket_notification", fake_listen)
yield
#fake_minio = mock.Mock(wraps=minio.Minio)
#fake_minio.return_value.listen_bucket_notification.return_value = nullcontext(enter_result=records)
#with mock.patch("minio.Minio", fake_minio):
# yield
return _patched_bucket_listener
86 changes: 85 additions & 1 deletion tests/test_main_interface.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
"""Tests for the gathered publisher functions."""

import pytest
from pytroll_watchers import get_generator_for_backend, get_publisher_for_backend
import yaml
from posttroll.testing import patched_publisher
from pytroll_watchers.local_watcher import file_generator as local_generator
from pytroll_watchers.local_watcher import file_publisher as local_publisher
from pytroll_watchers.main_interface import (
cli,
get_generator_for_backend,
get_publisher_for_backend,
publish_from_config,
)
from pytroll_watchers.minio_notification_watcher import file_generator as minio_generator
from pytroll_watchers.minio_notification_watcher import file_publisher as minio_publisher
from pytroll_watchers.testing import patched_bucket_listener, patched_local_events # noqa


def test_getting_right_publisher():
Expand All @@ -25,3 +33,79 @@ def test_getting_right_generator():
assert generator == local_generator
with pytest.raises(ValueError, match="Unknown backend"):
_ = get_generator_for_backend("some_other_backend")


def test_pass_config_to_file_publisher_for_local_backend(tmp_path, patched_local_events): # noqa
"""Test passing a config to create a file publisher from a local fs."""
local_settings = dict(directory=tmp_path)
publisher_settings = dict(nameservers=False, port=1979)
message_settings = dict(subject="/segment/viirs/l1b/", atype="file", data=dict(sensor="viirs"))
config = dict(backend="local",
fs_config=local_settings,
publisher_config=publisher_settings,
message_config=message_settings)
with patched_publisher() as msgs:
filename = tmp_path / "bla"
with patched_local_events([filename]):
publish_from_config(config)
assert len(msgs) == 1
assert str(filename) in msgs[0]


def test_pass_config_to_object_publisher_for_minio_backend(patched_bucket_listener): # noqa
"""Test passing a config to create an objec publisher from minio bucket."""
s3_settings = dict(endpoint_url="someendpoint",
bucket_name="viirs-data",
storage_options=dict())
publisher_settings = dict(nameservers=False, port=1979)
message_settings = dict(subject="/segment/viirs/l1b/", atype="file", data=dict(sensor="viirs"))
config = dict(backend="minio",
fs_config=s3_settings,
publisher_config=publisher_settings,
message_config=message_settings)

records = [{"Records": [{
"eventName": "s3:ObjectCreated:Put",
"s3": {"bucket": {"arn": "arn:aws:s3:::viirs-data",
"name": "viirs-data",
"ownerIdentity": {"principalId": "someuser"}},
"object": {"contentType": "application/x-hdf5",
"key": "sdr/bla.h5",
"size": 22183568,
"userMetadata": {"content-type": "application/x-hdf5"}}}}]}]


with patched_publisher() as msgs:
with patched_bucket_listener(records):
publish_from_config(config)
assert len(msgs) == 1
assert str("sdr/bla.h5") in msgs[0]


def test_pass_config_to_file_publisher_for_spurious_backend():
"""Test that spurious backend fails."""
config = {}
config["backend"] = "some_other_backend"
with pytest.raises(ValueError, match="Unknown backend"):
publish_from_config(config)

def test_cli(tmp_path, patched_local_events): # noqa
"""Test the command-line interface."""
local_settings = dict(directory=str(tmp_path))
publisher_settings = dict(nameservers=False, port=1979)
message_settings = dict(subject="/segment/viirs/l1b/", atype="file", data=dict(sensor="viirs"))
config = dict(backend="local",
fs_config=local_settings,
publisher_config=publisher_settings,
message_config=message_settings)

config_file = tmp_path / "config.yaml"
with open(config_file, "w") as fd:
fd.write(yaml.dump(config))

with patched_publisher() as msgs:
filename = tmp_path / "bla"
with patched_local_events([filename]):
cli([str(config_file)])
assert len(msgs) == 1
assert str(filename) in msgs[0]

0 comments on commit b4d79df

Please sign in to comment.