Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use pytest-xdist #440

Merged
merged 2 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## Unreleased

- Using pytest-xdist for faster local tests
[PR #440](https://github.com/aai-institute/pyDVL/pull/440)
- Implementation of Data-OOB by @BastienZim
[PR #426](https://github.com/aai-institute/pyDVL/pull/426),
[PR $431](https://github.com/aai-institute/pyDVL/pull/431)
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@ pytest-docker==2.0.0
pytest-mock
pytest-timeout
pytest-lazy-fixture
pytest-xdist>=3.3.1
wheel
twine==4.0.2
20 changes: 16 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,6 @@ def seed_numpy(seed=42):
np.random.seed(seed)


@pytest.fixture(scope="session")
def num_workers():
# Run with 2 CPUs inside GitHub actions
if os.getenv("CI"):
Expand All @@ -205,9 +204,22 @@ def num_workers():
return max(1, min(available_cpus() - 1, 4))


@pytest.fixture
def n_jobs(num_workers):
return num_workers
@pytest.fixture(scope="session")
def n_jobs():
return num_workers()


def pytest_xdist_auto_num_workers(config) -> Optional[int]:
"""Return the number of workers to use for pytest-xdist.
This is used by pytest-xdist to automatically determine the number of
workers to use. We want to use all available CPUs, but leave one CPU for
the main process.
"""

if config.option.numprocesses == "auto":
return max(1, (available_cpus() - 1) // num_workers())
return None


################################################################################
Expand Down
13 changes: 6 additions & 7 deletions tests/utils/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,19 @@

from pydvl.parallel.config import ParallelConfig

from ..conftest import num_workers


@pytest.fixture(scope="module", params=["joblib", "ray-local", "ray-external"])
def parallel_config(request, num_workers):
def parallel_config(request):
if request.param == "joblib":
yield ParallelConfig(backend="joblib", n_cpus_local=num_workers)
yield ParallelConfig(backend="joblib", n_cpus_local=num_workers())
elif request.param == "ray-local":
try:
import ray
except ImportError:
pytest.skip("Ray not installed.")
yield ParallelConfig(backend="ray", n_cpus_local=num_workers)
yield ParallelConfig(backend="ray", n_cpus_local=num_workers())
ray.shutdown()
elif request.param == "ray-external":
try:
Expand All @@ -22,10 +24,7 @@ def parallel_config(request, num_workers):
pytest.skip("Ray not installed.")
# Starts a head-node for the cluster.
cluster = Cluster(
initialize_head=True,
head_node_args={
"num_cpus": num_workers,
},
initialize_head=True, head_node_args={"num_cpus": num_workers()}
)
yield ParallelConfig(backend="ray", address=cluster.address)
ray.shutdown()
Expand Down
24 changes: 13 additions & 11 deletions tests/utils/test_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,17 @@
from pydvl.parallel.futures import init_executor
from pydvl.utils.types import Seed

from ..conftest import num_workers

def test_effective_n_jobs(parallel_config, num_workers):

def test_effective_n_jobs(parallel_config):
parallel_backend = init_parallel_backend(parallel_config)
assert parallel_backend.effective_n_jobs(1) == 1
assert parallel_backend.effective_n_jobs(4) == min(4, num_workers)
assert parallel_backend.effective_n_jobs(4) == min(4, num_workers())
if parallel_config.address is None:
assert parallel_backend.effective_n_jobs(-1) == num_workers
assert parallel_backend.effective_n_jobs(-1) == num_workers()
else:
assert parallel_backend.effective_n_jobs(-1) == num_workers
assert parallel_backend.effective_n_jobs(-1) == num_workers()

for n_jobs in [-1, 1, 2]:
assert parallel_backend.effective_n_jobs(n_jobs) == effective_n_jobs(
Expand Down Expand Up @@ -166,7 +168,7 @@ def test_map_reduce_seeding(parallel_config, seed_1, seed_2, op):
assert op(result_1, result_2)


def test_wrap_function(parallel_config, num_workers):
def test_wrap_function(parallel_config):
if parallel_config.backend != "ray":
pytest.skip("Only makes sense for ray")

Expand All @@ -188,8 +190,8 @@ def get_pid():
return os.getpid()

wrapped_func = parallel_backend.wrap(get_pid, num_cpus=1)
pids = parallel_backend.get([wrapped_func() for _ in range(num_workers)])
assert len(set(pids)) == num_workers
pids = parallel_backend.get([wrapped_func() for _ in range(num_workers())])
assert len(set(pids)) == num_workers()


def test_futures_executor_submit(parallel_config):
Expand All @@ -205,7 +207,7 @@ def test_futures_executor_map(parallel_config):
assert results == [1, 2, 3]


def test_futures_executor_map_with_max_workers(parallel_config, num_workers):
def test_futures_executor_map_with_max_workers(parallel_config):
if parallel_config.backend != "ray":
pytest.skip("Currently this test only works with Ray")

Expand All @@ -215,12 +217,12 @@ def func(_):

start_time = time.monotonic()
with init_executor(config=parallel_config) as executor:
assert executor._max_workers == num_workers
assert executor._max_workers == num_workers()
list(executor.map(func, range(3)))
end_time = time.monotonic()
total_time = end_time - start_time
# We expect the time difference to be > 3 / num_workers, but has to be at least 1
assert total_time > max(1.0, 3 / num_workers)
# We expect the time difference to be > 3 / num_workers(), but has to be at least 1
assert total_time > max(1.0, 3 / num_workers())


def test_future_cancellation(parallel_config):
Expand Down
5 changes: 3 additions & 2 deletions tests/value/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pydvl.utils.status import Status
from pydvl.value import ValuationResult

from ..conftest import num_workers
from . import polynomial


Expand Down Expand Up @@ -122,5 +123,5 @@ def linear_shapley(linear_dataset, scorer, n_jobs):


@pytest.fixture(scope="module")
def parallel_config(num_workers):
yield ParallelConfig(backend="joblib", n_cpus_local=num_workers, wait_timeout=0.1)
def parallel_config():
yield ParallelConfig(backend="joblib", n_cpus_local=num_workers(), wait_timeout=0.1)
6 changes: 3 additions & 3 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ setenv =
[testenv:base]
description = Tests base modules
commands =
pytest --cov "{envsitepackagesdir}/pydvl" -m "not torch" {posargs}
pytest -n auto --cov "{envsitepackagesdir}/pydvl" -m "not torch" {posargs}

[testenv:torch]
description = Tests modules that rely on pytorch
commands =
pytest --cov "{envsitepackagesdir}/pydvl" -m torch {posargs}
pytest -n auto --cov "{envsitepackagesdir}/pydvl" -m torch {posargs}
extras =
influence

Expand All @@ -26,7 +26,7 @@ description = Tests notebooks
setenv =
PYTHONPATH={toxinidir}/notebooks
commands =
pytest notebooks/ --cov "{envsitepackagesdir}/pydvl"
pytest -n auto notebooks/ --cov "{envsitepackagesdir}/pydvl" {posargs}
deps =
{[testenv]deps}
jupyter==1.0.0
Expand Down