Skip to content

Commit

Permalink
Merge pull request #114 from adybbroe/add-s3stalker-daemon
Browse files Browse the repository at this point in the history
Add an s3stalker daemon
  • Loading branch information
pnuu authored Mar 20, 2023
2 parents c4bcfa8 + 456c73b commit 5b40ded
Show file tree
Hide file tree
Showing 15 changed files with 796 additions and 199 deletions.
74 changes: 62 additions & 12 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -1,36 +1,86 @@
name: Run tests
name: CI

on:
- push
- pull_request

jobs:
build:
runs-on: ubuntu-latest
test:
runs-on: ${{ matrix.os }}
continue-on-error: ${{ matrix.experimental }}
strategy:
fail-fast: true
matrix:
python-version: ["3.8", "3.9", "3.10"]
os: ["ubuntu-latest"]
python-version: ["3.9", "3.10", "3.11"]
experimental: [false]
include:
- python-version: "3.11"
os: "ubuntu-latest"
experimental: true

env:
PYTHON_VERSION: ${{ matrix.python-version }}
OS: ${{ matrix.os }}
UNSTABLE: ${{ matrix.experimental }}
ACTIONS_ALLOW_UNSECURE_COMMANDS: true

steps:
- name: Checkout source
uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4

- name: Setup Conda Environment
uses: conda-incubator/setup-miniconda@v2
with:
miniconda-version: "latest"
python-version: ${{ matrix.python-version }}
- name: Install dependencies
mamba-version: "*"
channels: conda-forge,defaults
environment-file: continuous_integration/environment.yaml
activate-environment: test-environment

- name: Install unstable dependencies
if: matrix.experimental == true
shell: bash -l {0}
run: |
pip install -U pytest pytest-cov trollsift netifaces watchdog posttroll pyyaml pyinotify pyresample \
pytroll-schedule s3fs fsspec python-dateutil paramiko
python -m pip install \
--no-deps --pre --upgrade \
numpy; \
python -m pip install \
--no-deps --upgrade \
git+https://github.com/pytroll/posttroll \
git+https://github.com/pytroll/pytroll-schedule \
git+https://github.com/pytroll/trollsift;
- name: Install pytroll-collectors
run: |
pip install --no-deps -e .
- name: Run tests
- name: Run unit tests
shell: bash -l {0}
run: |
pytest --cov=pytroll_collectors pytroll_collectors/tests --cov-report=xml
- name: Upload coverage to Codecov
- name: Upload unittest coverage to Codecov
uses: codecov/codecov-action@v3
with:
flags: unittests
file: ./coverage.xml
env_vars: PYTHON_VERSION
env_vars: OS,PYTHON_VERSION,UNSTABLE
fail_ci_if_error: false

- name: Coveralls Parallel
uses: AndreMiras/coveralls-python-action@develop
with:
flag-name: run-${{ matrix.test_number }}
parallel: true
if: runner.os == 'Linux'

coveralls:
needs: [test]
runs-on: ubuntu-latest
steps:
- name: Coveralls Finished
uses: AndreMiras/coveralls-python-action@develop
with:
parallel-finished: true
31 changes: 5 additions & 26 deletions bin/s3stalker.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright (c) 2020 Martin Raspaud
# Copyright (c) 2020, 2023 Martin Raspaud

# Author(s):

Expand All @@ -27,38 +27,17 @@
with a cronjob.
"""

import argparse
import logging.config

import yaml

from pytroll_collectors.helper_functions import read_yaml
from pytroll_collectors.s3stalker import publish_new_files


def arg_parse():
"""Handle input arguments."""
parser = argparse.ArgumentParser()
parser.add_argument("bucket", help="The bucket to retrieve from.")
parser.add_argument("config", help="Config file to be used")
parser.add_argument("-l", "--log",
help="Log configuration file",
default=None)

return parser.parse_args()
from pytroll_collectors.s3stalker import publish_new_files, get_configs_from_command_line


def main():
"""Stalk an s3 bucket."""
args = arg_parse()

bucket = args.bucket
config = read_yaml(args.config)
bucket, config, log_config = get_configs_from_command_line()

if args.log is not None:
with open(args.log) as fd:
log_dict = yaml.safe_load(fd.read())
logging.config.dictConfig(log_dict)
if log_config:
logging.config.dictConfig(log_config)

try:
publish_new_files(bucket, config)
Expand Down
73 changes: 73 additions & 0 deletions bin/s3stalker_daemon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright (c) 2022 Pytroll developers

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""S3stalker daemon/runner.
This is a daemon supposed to stay up and running "forever". It will regularly
fetch fresh filenames from an S3 object store and publish the urls of those new
filenames. It is a daemon version of the s3stalker.py script which needs to be
run as a cronjob.
"""

import argparse
import logging
import logging.config
import sys

from pytroll_collectors.s3stalker import get_configs_from_command_line
from pytroll_collectors.s3stalker_daemon_runner import S3StalkerRunner

logger = logging.getLogger(__name__)


def arg_parse():
"""Handle input arguments."""
parser = argparse.ArgumentParser()
parser.add_argument("bucket", help="The bucket to retrieve from.")
parser.add_argument("config", help="Config file to be used")
parser.add_argument("-l", "--log",
help="Log configuration file",
default=None)

return parser.parse_args()


def main():
"""Stalk an s3 bucket."""
bucket, config, log_config = get_configs_from_command_line()

if log_config:
logging.config.dictConfig(log_config)

logger.info("Try start the s3-stalker runner:")
try:
s3runner = S3StalkerRunner(bucket, config)
s3runner.start()
s3runner.join()
except KeyboardInterrupt:
logger.debug("Interrupting")
except Exception as err:
logger.error('The S3 Stalker Runner crashed: %s', str(err))
sys.exit(1)
finally:
s3runner.close()


if __name__ == '__main__':
main()
38 changes: 38 additions & 0 deletions continuous_integration/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: test-environment
channels:
- conda-forge
dependencies:
- distributed
- toolz
- sphinx
- pyyaml
- pyproj
- coveralls
- coverage
- codecov
- behave
- mock
- numpy
- pyresample
- pyorbital
- pycrs
- cartopy
- geopy
- freezegun
- paramiko
- responses
- netifaces
- watchdog
- s3fs
- pyinotify
- requests
- openssl
- pytz
- python-dateutil
- pytest
- pytest-cov
- pip
- pip:
- trollsift
- posttroll
- pytroll-schedule
20 changes: 20 additions & 0 deletions doc/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,26 @@ trollstalker2
New, alternative implementation of trollstalker. Not really needed,
as trollstalker works fine and is actively maintained.


s3stalker
^^^^^^^^^

A counterpart to trollstalker for polling for new files on an s3 bucket.
This is thought to be run regularly from eg. cron. For a daemon version of
this, check the next item.
Example configuration:
https://github.com/pytroll/pytroll-collectors/blob/main/examples/s3stalker.yaml

s3stalker_daemon
^^^^^^^^^^^^^^^^

The daemon version of s3stalker, that stays on and polls until stopped
(preferably with a SIGTERM).
Example configuration:
https://github.com/pytroll/pytroll-collectors/blob/main/examples/s3stalker_runner.yaml_template

See also https://s3fs.readthedocs.io/en/latest/#credentials on options how to define the S3 credentials.

zipcollector_runner
^^^^^^^^^^^^^^^^^^^

Expand Down
22 changes: 17 additions & 5 deletions examples/geographic_gatherer_config.ini_template
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ sensor = viirs
timeliness = 10
# duration of a granule in SECONDS
duration = 180
publish_topic =
publish_topic =
# The topics to listen for:
topics = /viirs/sdr/1

[ears_viirs]
pattern = /data/prod/satellit/ears/viirs/SVMC_{platform}_d{start_date:%Y%m%d}_t{start_time:%H%M%S%f}_e{end_time:%H%M%S%f}_b{orbit_number:5d}_c{proctime:%Y%m%d%H%M%S%f}_eum_ops.h5.bz2
Expand All @@ -32,7 +34,9 @@ sensor = viirs
timeliness = 30
duration = 85.4
variant = EARS
publish_topic =
publish_topic =
# The topics to listen for:
topics = /ears/viirs/sdr/1

[ears_noaa18_avhrr]
pattern = /data/prod/satellit/ears/avhrr/avhrr_{start_time:%Y%m%d_%H%M%S}_noaa18.hrp.bz2
Expand All @@ -44,7 +48,8 @@ platform_name = NOAA-18
sensor = avhrr/3
timeliness = 15
variant = EARS
publish_topic =
publish_topic =
topics = /ears/avhrr/hrpt/1

[ears_noaa19_avhrr]
pattern = /data/prod/satellit/ears/avhrr/avhrr_{start_time:%Y%m%d_%H%M%S}_noaa18.hrp.bz2
Expand All @@ -56,7 +61,8 @@ platform_name = NOAA-18
sensor = avhrr/3
timeliness = 15
variant = EARS
publish_topic =
publish_topic =
topics = /ears/avhrr/hrpt/1

[ears_metop-b]
pattern = /data/prod/satellit/ears/avhrr/AVHR_HRP_{data_processing_level:2s}_M01_{start_time:%Y%m%d%H%M%S}Z_{end_time:%Y%m%d%H%M%S}Z_N_O_{proc_time:%Y%m%d%H%M%S}Z.bz2
Expand All @@ -67,7 +73,8 @@ sensor = avhrr/3
timeliness = 15
level = 0
variant = EARS
publish_topic =
publish_topic =
topics = /ears/avhrr/metop/eps/1

[ears_metop-a]
pattern = /data/prod/satellit/ears/avhrr/AVHR_HRP_{data_processing_level:2s}_M02_{start_time:%Y%m%d%H%M%S}Z_{end_time:%Y%m%d%H%M%S}Z_N_O_{proc_time:%Y%m%d%H%M%S}Z.bz2
Expand All @@ -79,6 +86,7 @@ timeliness = 15
level = 0
variant = EARS
publish_topic = /EARS/Metop-B
topics = /ears/avhrr/metop/eps/1

[gds_metop-b]
pattern = /data/prod/satellit/metop2/AVHR_xxx_{data_processing_level:2s}_M01_{start_time:%Y%m%d%H%M%S}Z_{end_time:%Y%m%d%H%M%S}Z_N_O_{proc_time:%Y%m%d%H%M%S}Z
Expand All @@ -90,6 +98,7 @@ timeliness = 100
variant = GDS
orbit_type = polar
publish_topic = /GDS/Metop-B
topics = /gds/avhrr/metop/eps/1

[gds_metop-a]
pattern = /data/prod/satellit/metop2/AVHR_xxx_{level:2s}_M02_{start_time:%Y%m%d%H%M%S}Z_{end_time:%Y%m%d%H%M%S}Z_N_O_{proc_time:%Y%m%d%H%M%S}Z
Expand All @@ -100,6 +109,7 @@ sensor = avhrr/3
timeliness = 100
variant = GDS
publish_topic = /GDS/Metop-A
topics = /gds/avhrr/metop/eps/1

[EARS_terra]
pattern = /data/prod/satellit/modis/lvl1/thin_MOD021KM.A{start_time:%Y%j.%H%M}.005.{proc_time:%Y%j%H%M%S}.NRT.hdf
Expand All @@ -111,6 +121,7 @@ sensor = modis
timeliness = 180
duration = 300
variant = EARS
topics = /ears/modis/hdf4/1

[EARS_aqua]
pattern = /data/prod/satellit/modis/lvl1/thin_MYD021KM.A{start_time:%Y%j.%H%M}.005.{proc_time:%Y%j%H%M%S}.NRT.hdf
Expand All @@ -122,3 +133,4 @@ sensor = modis
timeliness = 180
duration = 300
variant = EARS
topics = /ears/modis/hdf4/1
17 changes: 17 additions & 0 deletions examples/s3stalker_log.yaml_template
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
version: 1
disable_existing_loggers: false
formatters:
pytroll:
format: '[%(asctime)s %(levelname)-8s %(name)s] %(message)s'
handlers:
console:
class: logging.StreamHandler
level: DEBUG
formatter: pytroll
stream: ext://sys.stdout
loggers:
posttroll:
level: INFO
root:
level: DEBUG
handlers: [console]
Loading

0 comments on commit 5b40ded

Please sign in to comment.