forked from dr-leo/pandaSDMX
-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add --sdmx-fetch-data; use in GHA workflow
- Loading branch information
Showing
4 changed files
with
67 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ repos: | |
hooks: | ||
- id: mypy | ||
additional_dependencies: | ||
- GitPython | ||
- lxml-stubs | ||
- pandas-stubs | ||
- pytest | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
|
||
import numpy as np | ||
import pandas as pd | ||
import platformdirs | ||
import pytest | ||
import responses | ||
|
||
|
@@ -17,6 +18,9 @@ | |
|
||
log = logging.getLogger(__name__) | ||
|
||
DATA_DEFAULT_DIR = platformdirs.user_cache_path("sdmx").joinpath("test-data") | ||
# DATA_REMOTE_URL = "[email protected]:khaeru/sdmx-test-data.git" | ||
DATA_REMOTE_URL = "https://github.com/khaeru/sdmx-test-data.git" | ||
|
||
# Expected to_pandas() results for data files; see expected_data() | ||
# - Keys are the file name (above) with '.' -> '-': 'foo.xml' -> 'foo-xml' | ||
|
@@ -49,12 +53,61 @@ def assert_pd_equal(left, right, **kwargs): | |
method(left, right, **kwargs) | ||
|
||
|
||
def fetch_data() -> Path: | ||
"""Fetch test data from GitHub.""" | ||
import git | ||
|
||
# Create a lock to avoid concurrency issues when running with pytest-xdist | ||
DATA_DEFAULT_DIR.mkdir(parents=True, exist_ok=True) | ||
blf = git.BlockingLockFile(DATA_DEFAULT_DIR, check_interval_s=0.1) | ||
blf._obtain_lock() | ||
|
||
# Initialize a git Repo object | ||
repo = git.Repo.init(DATA_DEFAULT_DIR) | ||
|
||
try: | ||
# Reference to existing 'origin' remote | ||
origin = repo.remotes["origin"] | ||
# Ensure the DATA_REMOTE_URL is among the URLs for this remote | ||
if DATA_REMOTE_URL not in origin.urls: | ||
origin.set_url(DATA_REMOTE_URL) | ||
except IndexError: | ||
# Create a new remote | ||
origin = repo.create_remote("origin", DATA_REMOTE_URL) | ||
|
||
log.info(f"Fetch test data from {origin} → {repo.working_dir}") | ||
|
||
origin.fetch("refs/heads/main", depth=1) # Fetch only 1 commit from the remote | ||
origin_main = origin.refs["main"] # Reference to 'origin/main' | ||
try: | ||
head = repo.heads["main"] # Reference to existing local 'main' | ||
except IndexError: | ||
head = repo.create_head("main", origin_main) # Create a local 'main' | ||
|
||
if ( | ||
head.commit != origin_main.commit # Commit differs | ||
or repo.is_dirty() # Working dir is dirty | ||
or len(repo.index.diff(head.commit)) | ||
): | ||
# Check out files into the working directory | ||
head.set_tracking_branch(origin_main).checkout() | ||
|
||
del blf # Release lock | ||
|
||
return Path(repo.working_dir) | ||
|
||
|
||
def pytest_addoption(parser): | ||
"""Add the ``--sdmx-test-data`` command-line option to pytest.""" | ||
"""Add pytest command-line options.""" | ||
parser.addoption( | ||
"--sdmx-fetch-data", | ||
action="store_true", | ||
help="fetch test specimens from GitHub", | ||
) | ||
parser.addoption( | ||
"--sdmx-test-data", | ||
# Use the environment variable value by default | ||
default=os.environ.get("SDMX_TEST_DATA", None), | ||
default=os.environ.get("SDMX_TEST_DATA", DATA_DEFAULT_DIR), | ||
help="path to SDMX test specimens", | ||
) | ||
|
||
|
@@ -70,8 +123,16 @@ def pytest_configure(config): | |
config._sdmx_reporter = ServiceReporter(config) | ||
config.pluginmanager.register(config._sdmx_reporter) | ||
|
||
# Optionally clone the test data | ||
if config.option.sdmx_fetch_data: | ||
config.option.sdmx_test_data = fetch_data() | ||
|
||
# Check the value can be converted to a path, and exists | ||
message = "Give --sdmx-test-data=… or set the SDMX_TEST_DATA environment variable" | ||
message = ( | ||
"Unable to locate test specimens. Give --sdmx-fetch-data, or use " | ||
"--sdmx-test-data=… or the SDMX_TEST_DATA environment variable to indicate an " | ||
"existing directory" | ||
) | ||
try: | ||
sdmx_test_data = Path(config.option.sdmx_test_data) | ||
except TypeError: # pragma: no cover | ||
|