Skip to content

Commit

Permalink
Add --sdmx-fetch-data; use in GHA workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
khaeru committed Dec 31, 2024
1 parent eb1d805 commit f3c7714
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 3 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pytest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ jobs:
uv run --no-sync \
pytest \
-ra --color=yes --verbose \
--sdmx-fetch-data \
--cov-report=xml \
--numprocesses auto
shell: bash
Expand Down
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ repos:
hooks:
- id: mypy
additional_dependencies:
- GitPython
- lxml-stubs
- pandas-stubs
- pytest
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ dependencies = [
cache = ["requests-cache"]
docs = ["furo", "IPython", "sphinx >= 8"]
tests = [
"GitPython",
"Jinja2",
"pytest >= 5",
"pytest-cov",
Expand Down
68 changes: 65 additions & 3 deletions sdmx/testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import numpy as np
import pandas as pd
import platformdirs
import pytest
import responses

Expand All @@ -17,6 +18,9 @@

log = logging.getLogger(__name__)

DATA_DEFAULT_DIR = platformdirs.user_cache_path("sdmx").joinpath("test-data")
# DATA_REMOTE_URL = "[email protected]:khaeru/sdmx-test-data.git"
DATA_REMOTE_URL = "https://github.com/khaeru/sdmx-test-data.git"

# Expected to_pandas() results for data files; see expected_data()
# - Keys are the file name (above) with '.' -> '-': 'foo.xml' -> 'foo-xml'
Expand Down Expand Up @@ -49,12 +53,62 @@ def assert_pd_equal(left, right, **kwargs):
method(left, right, **kwargs)


def fetch_data() -> Path:
"""Fetch test data from GitHub."""
import git

# Create a lock to avoid concurrency issues when running with pytest-xdist
blf = git.BlockingLockFile(
DATA_DEFAULT_DIR.with_name("test-data.lock"), check_interval_s=0.1
)
blf._obtain_lock()

# Initialize a git Repo object
repo = git.Repo.init(DATA_DEFAULT_DIR)

try:
# Reference to existing 'origin' remote
origin = repo.remotes["origin"]
# Ensure the DATA_REMOTE_URL is among the URLs for this remote
if DATA_REMOTE_URL not in origin.urls:
origin.set_url(DATA_REMOTE_URL)
except IndexError:
# Create a new remote
origin = repo.create_remote("origin", DATA_REMOTE_URL)

log.info(f"Fetch test data from {origin}{repo.working_dir}")

origin.fetch("refs/heads/main", depth=1) # Fetch only 1 commit from the remote
origin_main = origin.refs["main"] # Reference to 'origin/main'
try:
head = repo.heads["main"] # Reference to existing local 'main'
except IndexError:
head = repo.create_head("main", origin_main) # Create a local 'main'

if (
head.commit != origin_main.commit # Commit differs
or repo.is_dirty() # Working dir is dirty
or len(repo.index.diff(head.commit))
):
# Check out files into the working directory
head.set_tracking_branch(origin_main).checkout()

del blf # Release lock

return Path(repo.working_dir)


def pytest_addoption(parser):
"""Add the ``--sdmx-test-data`` command-line option to pytest."""
"""Add pytest command-line options."""
parser.addoption(
"--sdmx-fetch-data",
action="store_true",
help="fetch test specimens from GitHub",
)
parser.addoption(
"--sdmx-test-data",
# Use the environment variable value by default
default=os.environ.get("SDMX_TEST_DATA", None),
default=os.environ.get("SDMX_TEST_DATA", DATA_DEFAULT_DIR),
help="path to SDMX test specimens",
)

Expand All @@ -70,8 +124,16 @@ def pytest_configure(config):
config._sdmx_reporter = ServiceReporter(config)
config.pluginmanager.register(config._sdmx_reporter)

# Optionally clone the test data
if config.option.sdmx_fetch_data:
config.option.sdmx_test_data = fetch_data()

# Check the value can be converted to a path, and exists
message = "Give --sdmx-test-data=… or set the SDMX_TEST_DATA environment variable"
message = (
"Unable to locate test specimens. Give --sdmx-fetch-data, or use "
"--sdmx-test-data=… or the SDMX_TEST_DATA environment variable to indicate an "
"existing directory"
)
try:
sdmx_test_data = Path(config.option.sdmx_test_data)
except TypeError: # pragma: no cover
Expand Down

0 comments on commit f3c7714

Please sign in to comment.