From b1e9749dcdf705e3cac60a974fd1cf2de5f9afd2 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 26 Jan 2024 21:05:01 -0500 Subject: [PATCH] set NUM_WORKERS to 0 Signed-off-by: Jinzhe Zeng --- .github/workflows/test_python.yml | 2 +- deepmd/pt/infer/inference.py | 5 ++++- deepmd/pt/train/training.py | 2 +- deepmd/pt/utils/dataloader.py | 2 +- source/tests/pt/test_sampler.py | 2 +- source/tests/pt/test_saveload_dpa1.py | 2 +- source/tests/pt/test_saveload_se_e2_a.py | 2 +- 7 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml index c53e052ae5..091a2a61f8 100644 --- a/.github/workflows/test_python.yml +++ b/.github/workflows/test_python.yml @@ -41,7 +41,7 @@ jobs: - run: dp --version - run: pytest --cov=deepmd source/tests --durations=0 env: - NUM_WORKERS: 1 + NUM_WORKERS: 0 - uses: codecov/codecov-action@v3 with: gcov: true diff --git a/deepmd/pt/infer/inference.py b/deepmd/pt/infer/inference.py index 0e1e09665e..4906bb7a46 100644 --- a/deepmd/pt/infer/inference.py +++ b/deepmd/pt/infer/inference.py @@ -35,6 +35,7 @@ from deepmd.pt.utils.env import ( DEVICE, JIT, + NUM_WORKERS, ) if torch.__version__.startswith("2"): @@ -232,7 +233,9 @@ def run(self): dataset, sampler=sampler, batch_size=None, - num_workers=1, # setting to 0 diverges the behavior of its iterator; should be >=1 + num_workers=min( + NUM_WORKERS, 1 + ), # setting to 0 diverges the behavior of its iterator; should be >=1 drop_last=False, ) data = iter(dataloader) diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index b8d437452b..049685a6e3 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -161,7 +161,7 @@ def get_data_loader(_training_data, _validation_data, _training_params): _validation_data, sampler=valid_sampler, batch_size=None, - num_workers=1, + num_workers=min(NUM_WORKERS, 1), drop_last=False, pin_memory=True, ) diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py index f1a30bc1bf..7c95f66c9c 100644 --- a/deepmd/pt/utils/dataloader.py +++ b/deepmd/pt/utils/dataloader.py @@ -314,6 +314,6 @@ def get_weighted_sampler(training_data, prob_style, sys_prob=False): probs = process_sys_probs(prob_style, training_data.index) logging.info("Generated weighted sampler with prob array: " + str(probs)) # training_data.total_batch is the size of one epoch, you can increase it to avoid too many rebuilding of iteraters - len_sampler = training_data.total_batch * env.NUM_WORKERS + len_sampler = training_data.total_batch * max(env.NUM_WORKERS, 1) sampler = WeightedRandomSampler(probs, len_sampler, replacement=True) return sampler diff --git a/source/tests/pt/test_sampler.py b/source/tests/pt/test_sampler.py index 26740b6640..0ff16ed7c7 100644 --- a/source/tests/pt/test_sampler.py +++ b/source/tests/pt/test_sampler.py @@ -65,7 +65,7 @@ def test_sampler_debug_info(self): self.my_dataset, sampler=get_weighted_sampler(self.my_dataset, prob_style="prob_sys_size"), batch_size=None, - num_workers=1, # setting to 0 diverges the behavior of its iterator; should be >=1 + num_workers=0, # setting to 0 diverges the behavior of its iterator; should be >=1 drop_last=False, pin_memory=True, ) diff --git a/source/tests/pt/test_saveload_dpa1.py b/source/tests/pt/test_saveload_dpa1.py index 3d0b5bd117..d1043f7029 100644 --- a/source/tests/pt/test_saveload_dpa1.py +++ b/source/tests/pt/test_saveload_dpa1.py @@ -79,7 +79,7 @@ def setUp(self): self.dataset, sampler=torch.utils.data.RandomSampler(self.dataset), batch_size=None, - num_workers=1, # setting to 0 diverges the behavior of its iterator; should be >=1 + num_workers=0, # setting to 0 diverges the behavior of its iterator; should be >=1 drop_last=False, pin_memory=True, ) diff --git a/source/tests/pt/test_saveload_se_e2_a.py b/source/tests/pt/test_saveload_se_e2_a.py index a8d5032a25..95d7f97a88 100644 --- a/source/tests/pt/test_saveload_se_e2_a.py +++ b/source/tests/pt/test_saveload_se_e2_a.py @@ -79,7 +79,7 @@ def setUp(self): self.dataset, sampler=torch.utils.data.RandomSampler(self.dataset), batch_size=None, - num_workers=1, # setting to 0 diverges the behavior of its iterator; should be >=1 + num_workers=0, # setting to 0 diverges the behavior of its iterator; should be >=1 drop_last=False, pin_memory=True, )