From e833232f3943bba32040e442f3f15c8ae1a0a85f Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 30 Dec 2024 17:32:18 -0500 Subject: [PATCH] Add --sdmx-fetch-data; use in GHA workflow --- .github/workflows/pytest.yaml | 1 + .pre-commit-config.yaml | 1 + pyproject.toml | 1 + sdmx/testing/__init__.py | 67 +++++++++++++++++++++++++++++++++-- 4 files changed, 67 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index ca8d2a158..9a9ea28cf 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -51,6 +51,7 @@ jobs: uv run --no-sync \ pytest \ -ra --color=yes --verbose \ + --sdmx-fetch-data \ --cov-report=xml \ --numprocesses auto shell: bash diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 162344ef8..4a6335937 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,6 +4,7 @@ repos: hooks: - id: mypy additional_dependencies: + - GitPython - lxml-stubs - pandas-stubs - pytest diff --git a/pyproject.toml b/pyproject.toml index 048d81bc0..6a7bd87c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ dependencies = [ cache = ["requests-cache"] docs = ["furo", "IPython", "sphinx >= 8"] tests = [ + "GitPython", "Jinja2", "pytest >= 5", "pytest-cov", diff --git a/sdmx/testing/__init__.py b/sdmx/testing/__init__.py index 2ac594d85..596ce5e70 100644 --- a/sdmx/testing/__init__.py +++ b/sdmx/testing/__init__.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd +import platformdirs import pytest import responses @@ -17,6 +18,9 @@ log = logging.getLogger(__name__) +DATA_DEFAULT_DIR = platformdirs.user_cache_path("sdmx").joinpath("test-data") +# DATA_REMOTE_URL = "git@github.com:khaeru/sdmx-test-data.git" +DATA_REMOTE_URL = "https://github.com/khaeru/sdmx-test-data.git" # Expected to_pandas() results for data files; see expected_data() # - Keys are the file name (above) with '.' -> '-': 'foo.xml' -> 'foo-xml' @@ -49,12 +53,61 @@ def assert_pd_equal(left, right, **kwargs): method(left, right, **kwargs) +def fetch_data() -> Path: + """Fetch test data from GitHub.""" + import git + + # Create a lock to avoid concurrency issues when running with pytest-xdist + DATA_DEFAULT_DIR.mkdir(parents=True, exist_ok=True) + blf = git.BlockingLockFile(DATA_DEFAULT_DIR, check_interval_s=0.1) + blf._obtain_lock() + + # Initialize a git Repo object + repo = git.Repo.init(DATA_DEFAULT_DIR) + + try: + # Reference to existing 'origin' remote + origin = repo.remotes["origin"] + # Ensure the DATA_REMOTE_URL is among the URLs for this remote + if DATA_REMOTE_URL not in origin.urls: + origin.set_url(DATA_REMOTE_URL) + except IndexError: + # Create a new remote + origin = repo.create_remote("origin", DATA_REMOTE_URL) + + log.info(f"Fetch test data from {origin} → {repo.working_dir}") + + origin.fetch("refs/heads/main", depth=1) # Fetch only 1 commit from the remote + origin_main = origin.refs["main"] # Reference to 'origin/main' + try: + head = repo.heads["main"] # Reference to existing local 'main' + except IndexError: + head = repo.create_head("main", origin_main) # Create a local 'main' + + if ( + head.commit != origin_main.commit # Commit differs + or repo.is_dirty() # Working dir is dirty + or len(repo.index.diff(head.commit)) + ): + # Check out files into the working directory + head.set_tracking_branch(origin_main).checkout() + + del blf # Release lock + + return Path(repo.working_dir) + + def pytest_addoption(parser): - """Add the ``--sdmx-test-data`` command-line option to pytest.""" + """Add pytest command-line options.""" + parser.addoption( + "--sdmx-fetch-data", + action="store_true", + help="fetch test specimens from GitHub", + ) parser.addoption( "--sdmx-test-data", # Use the environment variable value by default - default=os.environ.get("SDMX_TEST_DATA", None), + default=os.environ.get("SDMX_TEST_DATA", DATA_DEFAULT_DIR), help="path to SDMX test specimens", ) @@ -70,8 +123,16 @@ def pytest_configure(config): config._sdmx_reporter = ServiceReporter(config) config.pluginmanager.register(config._sdmx_reporter) + # Optionally clone the test data + if config.option.sdmx_fetch_data: + config.option.sdmx_test_data = fetch_data() + # Check the value can be converted to a path, and exists - message = "Give --sdmx-test-data=… or set the SDMX_TEST_DATA environment variable" + message = ( + "Unable to locate test specimens. Give --sdmx-fetch-data, or use " + "--sdmx-test-data=… or the SDMX_TEST_DATA environment variable to indicate an " + "existing directory" + ) try: sdmx_test_data = Path(config.option.sdmx_test_data) except TypeError: # pragma: no cover