Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
awaelchli committed Jul 10, 2024
1 parent 499bb9d commit 62775f3
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/litdata/streaming/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def _replay_chunks_sampling(
indexes[worker_idx] -= size
chunks_index[worker_idx] += 1
else:
# We've reached the chunk where resuming needs to take place
# We've reached the chunk where resuming needs to take place (for this worker)
break

return chunks_index, indexes
4 changes: 2 additions & 2 deletions tests/streaming/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,7 +807,7 @@ def _get_simulated_s3_dataloader(cache_dir, data_dir):
return StreamingDataLoader(dataset, batch_size=2, num_workers=1)


@pytest.mark.skipif(sys.platform in ("win32", "darwin"), reason="Not tested on windows and MacOs")
@pytest.mark.skipif(sys.platform == "win32", reason="Not tested on windows and MacOs")
@mock.patch.dict(os.environ, {}, clear=True)
def test_dataset_resume_on_future_chunks(tmpdir, monkeypatch):
"""This test is constructed to test resuming from a chunk past the first chunk, when subsequent chunks don't have
Expand All @@ -824,7 +824,7 @@ def test_dataset_resume_on_future_chunks(tmpdir, monkeypatch):
chunk_size=190,
num_workers=4,
)
assert len(os.listdir(tmpdir / "optimized")) > 1
assert len(os.listdir(tmpdir / "optimized")) == 9 # 8 chunks + 1 index file

os.mkdir(s3_cache_dir)
train_dataloader = _get_simulated_s3_dataloader(s3_cache_dir, data_dir)
Expand Down

0 comments on commit 62775f3

Please sign in to comment.