diff --git a/CHANGELOG.md b/CHANGELOG.md index abea5f5ac..790815955 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ - Fixed missing move of tensors to model device in `EkfacInfluence` implementation [PR #570](https://github.com/aai-institute/pyDVL/pull/570) +### Added + +- Add a device fixture for `pytest`, which depending on the availability and + user input (`pytest --with-cuda`) resolves to cuda device + [PR #574](https://github.com/aai-institute/pyDVL/pull/574) + ## 0.9.1 - Bug fixes, logging improvement ### Fixed diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 56d8ead7b..ecd1288de 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -131,6 +131,13 @@ There are a few important arguments: - `--slow-tests` enables running slow tests. See below for a description of slow tests. +- `--with-cuda` sets the device fixture in [tests/influence/torch/conftest.py]( + tests/influence/torch/conftest.py) to `cuda` if it is available. + Using this fixture within tests, you can run parts of your tests on a `cuda` + device. Be aware, that you still have to take care of the usage of the device + manually in a specific test. Setting this flag does not result in + running all tests on a GPU. + ### Markers We use a few different markers to differentiate between tests and runs diff --git a/tests/conftest.py b/tests/conftest.py index b08f09377..d8594c314 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -48,6 +48,12 @@ def pytest_addoption(parser): default=False, help="Disable reporting. Verbose mode takes precedence.", ) + parser.addoption( + "--with-cuda", + action="store_true", + default=False, + help="Set device fixture to 'cuda' if available", + ) @pytest.fixture diff --git a/tests/influence/torch/conftest.py b/tests/influence/torch/conftest.py index b16a2d856..37459f1cc 100644 --- a/tests/influence/torch/conftest.py +++ b/tests/influence/torch/conftest.py @@ -1,5 +1,6 @@ from typing import Tuple +import pytest import torch from numpy.typing import NDArray from torch.optim import LBFGS @@ -59,3 +60,14 @@ def closure(): def torch_linear_model_to_numpy(model: torch.nn.Linear) -> Tuple[NDArray, NDArray]: model.eval() return model.weight.data.numpy(), model.bias.data.numpy() + + +@pytest.fixture(scope="session") +def device(request): + import torch + + use_cuda = request.config.getoption("--with-cuda") + if use_cuda and torch.cuda.is_available(): + return torch.device("cuda") + else: + return torch.device("cpu") diff --git a/tests/influence/torch/test_influence_model.py b/tests/influence/torch/test_influence_model.py index 0631c60fc..d2203a84e 100644 --- a/tests/influence/torch/test_influence_model.py +++ b/tests/influence/torch/test_influence_model.py @@ -340,6 +340,7 @@ def test_influence_linear_model( rtol, mode: InfluenceMode, train_set_size: int, + device: torch.device, hessian_reg: float = 0.1, test_set_size: int = 20, problem_dimension: Tuple[int, int] = (4, 20), @@ -373,16 +374,20 @@ def test_influence_linear_model( train_data_set = TensorDataset(*list(map(torch.from_numpy, train_data))) train_data_loader = DataLoader(train_data_set, batch_size=40, num_workers=0) - influence = influence_factory(linear_layer, loss, train_data_loader, hessian_reg) + influence = influence_factory( + linear_layer.to(device), loss, train_data_loader, hessian_reg + ) x_train, y_train = tuple(map(torch.from_numpy, train_data)) x_test, y_test = tuple(map(torch.from_numpy, test_data)) - influence_values = influence.influences( - x_test, y_test, x_train, y_train, mode=mode - ).numpy() - sym_influence_values = influence.influences( - x_train, y_train, x_train, y_train, mode=mode - ).numpy() + influence_values = ( + influence.influences(x_test, y_test, x_train, y_train, mode=mode).cpu().numpy() + ) + sym_influence_values = ( + influence.influences(x_train, y_train, x_train, y_train, mode=mode) + .cpu() + .numpy() + ) with pytest.raises(ValueError): influence.influences(x_test, y_test, x=x_train, mode=mode) @@ -431,6 +436,7 @@ def test_influences_lissa( ], direct_influences, influence_factory, + device, ): model, loss, x_train, y_train, x_test, y_test = model_and_data @@ -438,11 +444,15 @@ def test_influences_lissa( TensorDataset(x_train, y_train), batch_size=test_case.batch_size ) influence_model = influence_factory( - model, loss, train_dataloader, test_case.hessian_reg + model.to(device), loss, train_dataloader, test_case.hessian_reg + ) + approx_influences = ( + influence_model.influences( + x_test, y_test, x_train, y_train, mode=test_case.mode + ) + .cpu() + .numpy() ) - approx_influences = influence_model.influences( - x_test, y_test, x_train, y_train, mode=test_case.mode - ).numpy() assert not np.any(np.isnan(approx_influences)) @@ -497,9 +507,10 @@ def test_influences_low_rank( direct_sym_influences, direct_factors, influence_factory, + device: torch.device, ): - atol = 1e-8 - rtol = 1e-5 + atol = 1e-7 + rtol = 1e-4 model, loss, x_train, y_train, x_test, y_test = model_and_data num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) @@ -509,7 +520,7 @@ def test_influences_low_rank( ) influence_func_model = influence_factory( - model, + model.to(device), loss, test_case.hessian_reg, num_parameters - 1, @@ -525,33 +536,47 @@ def test_influences_low_rank( influence_func_model = influence_func_model.fit(train_dataloader) - low_rank_influence = influence_func_model.influences( - x_test, y_test, x_train, y_train, mode=test_case.mode - ).numpy() + low_rank_influence = ( + influence_func_model.influences( + x_test, y_test, x_train, y_train, mode=test_case.mode + ) + .cpu() + .numpy() + ) - sym_low_rank_influence = influence_func_model.influences( - x_train, y_train, mode=test_case.mode - ).numpy() + sym_low_rank_influence = ( + influence_func_model.influences(x_train, y_train, mode=test_case.mode) + .cpu() + .numpy() + ) low_rank_factors = influence_func_model.influence_factors(x_test, y_test) assert np.allclose( direct_factors, - influence_func_model.influence_factors(x_train, y_train).numpy(), + influence_func_model.influence_factors(x_train, y_train).cpu().numpy(), atol=atol, rtol=rtol, ) if test_case.mode is InfluenceMode.Up: - low_rank_influence_transpose = influence_func_model.influences( - x_train, y_train, x_test, y_test, mode=test_case.mode - ).numpy() + low_rank_influence_transpose = ( + influence_func_model.influences( + x_train, y_train, x_test, y_test, mode=test_case.mode + ) + .cpu() + .numpy() + ) assert np.allclose( low_rank_influence_transpose, low_rank_influence.swapaxes(0, 1) ) - low_rank_values_from_factors = influence_func_model.influences_from_factors( - low_rank_factors, x_train, y_train, mode=test_case.mode - ).numpy() + low_rank_values_from_factors = ( + influence_func_model.influences_from_factors( + low_rank_factors, x_train, y_train, mode=test_case.mode + ) + .cpu() + .numpy() + ) assert np.allclose(direct_influences, low_rank_influence, atol=atol, rtol=rtol) assert np.allclose( direct_sym_influences, sym_low_rank_influence, atol=atol, rtol=rtol @@ -578,6 +603,7 @@ def test_influences_ekfac( ], direct_influences, direct_sym_influences, + device: torch.device, ): model, loss, x_train, y_train, x_test, y_test = model_and_data @@ -589,7 +615,7 @@ def test_influences_ekfac( model, update_diagonal=True, hessian_regularization=test_case.hessian_reg, - ) + ).to(device) with pytest.raises(NotFittedException): ekfac_influence.influences( @@ -604,9 +630,13 @@ def test_influences_ekfac( ekfac_influence.fit(train_dataloader) elif isinstance(loss, nn.CrossEntropyLoss): ekfac_influence = ekfac_influence.fit(train_dataloader) - ekfac_influence_values = ekfac_influence.influences( - x_test, y_test, x_train, y_train, mode=test_case.mode - ).numpy() + ekfac_influence_values = ( + ekfac_influence.influences( + x_test, y_test, x_train, y_train, mode=test_case.mode + ) + .cpu() + .numpy() + ) ekfac_influences_by_layer = ekfac_influence.influences_by_layer( x_test, y_test, x_train, y_train, mode=test_case.mode @@ -614,22 +644,32 @@ def test_influences_ekfac( accumulated_inf_by_layer = np.zeros_like(ekfac_influence_values) for layer, infl in ekfac_influences_by_layer.items(): - accumulated_inf_by_layer += infl.detach().numpy() + accumulated_inf_by_layer += infl.detach().cpu().numpy() - ekfac_self_influence = ekfac_influence.influences( - x_train, y_train, mode=test_case.mode - ).numpy() + ekfac_self_influence = ( + ekfac_influence.influences(x_train, y_train, mode=test_case.mode) + .cpu() + .numpy() + ) ekfac_factors = ekfac_influence.influence_factors(x_test, y_test) - influence_from_factors = ekfac_influence.influences_from_factors( - ekfac_factors, x_train, y_train, mode=test_case.mode - ).numpy() + influence_from_factors = ( + ekfac_influence.influences_from_factors( + ekfac_factors, x_train, y_train, mode=test_case.mode + ) + .cpu() + .numpy() + ) assert np.allclose(ekfac_influence_values, influence_from_factors) assert np.allclose(ekfac_influence_values, accumulated_inf_by_layer) - check_influence_correlations(direct_influences, ekfac_influence_values) - check_influence_correlations(direct_sym_influences, ekfac_self_influence) + check_influence_correlations( + direct_influences, ekfac_influence_values, threshold=0.94 + ) + check_influence_correlations( + direct_sym_influences, ekfac_self_influence, threshold=0.94 + ) @pytest.mark.torch @@ -656,6 +696,7 @@ def test_influences_cg( direct_factors, use_block_cg: bool, pre_conditioner: PreConditioner, + device: torch.device, ): model, loss, x_train, y_train, x_test, y_test = model_and_data @@ -663,7 +704,7 @@ def test_influences_cg( TensorDataset(x_train, y_train), batch_size=test_case.batch_size ) influence_model = CgInfluence( - model, + model.to(device), loss, test_case.hessian_reg, maxiter=5, @@ -672,9 +713,13 @@ def test_influences_cg( ) influence_model = influence_model.fit(train_dataloader) - approx_influences = influence_model.influences( - x_test, y_test, x_train, y_train, mode=test_case.mode - ).numpy() + approx_influences = ( + influence_model.influences( + x_test, y_test, x_train, y_train, mode=test_case.mode + ) + .cpu() + .numpy() + ) assert not np.any(np.isnan(approx_influences)) @@ -701,7 +746,11 @@ def test_influences_cg( # check that block variant returns the correct vector, if only one right hand side # is provided if use_block_cg: - single_influence = influence_model.influence_factors( - x_train[0].unsqueeze(0), y_train[0].unsqueeze(0) - ).numpy() + single_influence = ( + influence_model.influence_factors( + x_train[0].unsqueeze(0), y_train[0].unsqueeze(0) + ) + .cpu() + .numpy() + ) assert np.allclose(single_influence, direct_factors[0], atol=1e-6, rtol=1e-4)