From 1368f6431e738f9351511b109cd784b843a05ec8 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Fri, 23 Feb 2024 17:19:51 +0100 Subject: [PATCH] rename `litdata` (#16) --- .github/workflows/ci-checks.yml | 2 +- .github/workflows/ci-testing.yml | 2 +- MANIFEST.in | 4 +-- Makefile | 2 +- README.md | 26 +++++++++---------- docs/source/conf.py | 10 +++---- lightning_data/__init__.py | 20 -------------- {lightning_data => litdata}/CHANGELOG.md | 0 {lightning_data => litdata}/__about__.py | 0 litdata/__init__.py | 20 ++++++++++++++ {lightning_data => litdata}/constants.py | 0 .../processing/__init__.py | 0 .../processing/data_processor.py | 18 ++++++------- .../processing/functions.py | 10 +++---- .../processing/readers.py | 0 .../processing/utilities.py | 4 +-- .../streaming/__init__.py | 10 +++---- .../streaming/cache.py | 18 ++++++------- .../streaming/client.py | 2 +- .../streaming/combined.py | 4 +-- .../streaming/compression.py | 0 .../streaming/config.py | 10 +++---- .../streaming/dataloader.py | 18 ++++++------- .../streaming/dataset.py | 16 ++++++------ .../streaming/downloader.py | 4 +-- .../streaming/item_loader.py | 4 +-- .../streaming/reader.py | 12 ++++----- .../streaming/resolver.py | 0 .../streaming/sampler.py | 0 .../streaming/serializers.py | 2 +- .../streaming/shuffle.py | 6 ++--- .../streaming/writer.py | 12 ++++----- .../utilities/__init__.py | 0 .../utilities/broadcast.py | 0 {lightning_data => litdata}/utilities/env.py | 0 .../utilities/format.py | 0 .../utilities/packing.py | 0 .../utilities/shuffle.py | 2 +- pyproject.toml | 6 ++--- setup.py | 8 +++--- tests/processing/test_data_processor.py | 16 ++++++------ tests/processing/test_functions.py | 4 +-- tests/processing/test_readers.py | 4 +-- tests/processing/test_utilities.py | 6 ++--- tests/streaming/test_cache.py | 12 ++++----- tests/streaming/test_client.py | 2 +- tests/streaming/test_combined.py | 8 +++--- tests/streaming/test_dataloader.py | 4 +-- tests/streaming/test_dataset.py | 18 ++++++------- tests/streaming/test_downloader.py | 2 +- tests/streaming/test_reader.py | 14 +++++----- tests/streaming/test_resolver.py | 2 +- tests/streaming/test_sampler.py | 2 +- tests/streaming/test_serializer.py | 2 +- tests/streaming/test_writer.py | 10 +++---- tests/utilities/test_broadcast.py | 2 +- tests/utilities/test_format.py | 2 +- tests/utilities/test_packing.py | 2 +- tests/utilities/test_shuffle.py | 4 +-- 59 files changed, 184 insertions(+), 184 deletions(-) delete mode 100644 lightning_data/__init__.py rename {lightning_data => litdata}/CHANGELOG.md (100%) rename {lightning_data => litdata}/__about__.py (100%) create mode 100644 litdata/__init__.py rename {lightning_data => litdata}/constants.py (100%) rename {lightning_data => litdata}/processing/__init__.py (100%) rename {lightning_data => litdata}/processing/data_processor.py (98%) rename {lightning_data => litdata}/processing/functions.py (97%) rename {lightning_data => litdata}/processing/readers.py (100%) rename {lightning_data => litdata}/processing/utilities.py (96%) rename {lightning_data => litdata}/streaming/__init__.py (69%) rename {lightning_data => litdata}/streaming/cache.py (91%) rename {lightning_data => litdata}/streaming/client.py (97%) rename {lightning_data => litdata}/streaming/combined.py (98%) rename {lightning_data => litdata}/streaming/compression.py (100%) rename {lightning_data => litdata}/streaming/config.py (94%) rename {lightning_data => litdata}/streaming/dataloader.py (97%) rename {lightning_data => litdata}/streaming/dataset.py (97%) rename {lightning_data => litdata}/streaming/downloader.py (97%) rename {lightning_data => litdata}/streaming/item_loader.py (98%) rename {lightning_data => litdata}/streaming/reader.py (96%) rename {lightning_data => litdata}/streaming/resolver.py (100%) rename {lightning_data => litdata}/streaming/sampler.py (100%) rename {lightning_data => litdata}/streaming/serializers.py (99%) rename {lightning_data => litdata}/streaming/shuffle.py (96%) rename {lightning_data => litdata}/streaming/writer.py (97%) rename {lightning_data => litdata}/utilities/__init__.py (100%) rename {lightning_data => litdata}/utilities/broadcast.py (100%) rename {lightning_data => litdata}/utilities/env.py (100%) rename {lightning_data => litdata}/utilities/format.py (100%) rename {lightning_data => litdata}/utilities/packing.py (100%) rename {lightning_data => litdata}/utilities/shuffle.py (98%) diff --git a/.github/workflows/ci-checks.yml b/.github/workflows/ci-checks.yml index e518784e..9e9c8b53 100644 --- a/.github/workflows/ci-checks.yml +++ b/.github/workflows/ci-checks.yml @@ -30,7 +30,7 @@ jobs: uses: Lightning-AI/utilities/.github/workflows/check-package.yml@v0.10.1 with: actions-ref: v0.10.1 - import-name: "lightning_data" + import-name: "litdata" artifact-name: dist-packages-${{ github.sha }} testing-matrix: | { diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index ef4bdff9..4228b9c6 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -69,7 +69,7 @@ jobs: pip list - name: Tests - run: coverage run --source lightning_data -m pytest tests -v + run: coverage run --source litdata -m pytest tests -v - name: Statistics if: success() diff --git a/MANIFEST.in b/MANIFEST.in index 20c087c6..1e0ea67f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,10 +5,10 @@ recursive-exclude __pycache__ *.py[cod] *.orig # Include the README and CHANGELOG include *.md -recursive-include lightning_data *.md +recursive-include litdata *.md # Include the code -recursive-include lightning_data *.py +recursive-include litdata *.py # Include the license file include LICENSE diff --git a/Makefile b/Makefile index 377799ec..cf5250aa 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ test: clean pip install -q -r requirements/test.txt # use this to run tests - python -m coverage run --source lightning_data -m pytest src -v --flake8 + python -m coverage run --source litdata -m pytest src -v --flake8 python -m coverage report docs: clean diff --git a/README.md b/README.md index 373df10c..be52960c 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ Convert your raw dataset into Lightning Streaming format using the `optimize` op ```python import numpy as np -from lightning_data import optimize +from litdata import optimize from PIL import Image @@ -123,7 +123,7 @@ Here is an example with [AWS S3](https://aws.amazon.com/s3). ### 3. Use StreamingDataset and DataLoader ```python -from lightning_data import StreamingDataset +from litdata import StreamingDataset from torch.utils.data import DataLoader # Remote path where full dataset is persistently stored @@ -178,7 +178,7 @@ for i in range(1000): ```python import os -from lightning_data import map +from litdata import map from PIL import Image input_dir = "s3://my-bucket/my_images" @@ -202,7 +202,7 @@ if __name__ == "__main__": To scale data processing, create a free account on [lightning.ai](https://lightning.ai/) platform. With the platform, the `optimize` and `map` can start multiple machines to make data processing drastically faster as follows: ```python -from lightning_data import optimize, Machine +from litdata import optimize, Machine optimize( ... @@ -214,7 +214,7 @@ optimize( OR ```python -from lightning_data import map, Machine +from litdata import map, Machine map( ... @@ -244,8 +244,8 @@ The `StreamingDataset` and `StreamingDataLoader` takes care of everything for yo You can easily experiment with dataset mixtures using the CombinedStreamingDataset. ```python -from lightning_data import StreamingDataset, CombinedStreamingDataset -from lightning_data.streaming.item_loader import TokensLoader +from litdata import StreamingDataset, CombinedStreamingDataset +from litdata.streaming.item_loader import TokensLoader from tqdm import tqdm import os from torch.utils.data import DataLoader @@ -285,7 +285,7 @@ Note: The `StreamingDataLoader` is used by [Lit-GPT](https://github.com/Lightnin ```python import os import torch -from lightning_data import StreamingDataset, StreamingDataLoader +from litdata import StreamingDataset, StreamingDataLoader dataset = StreamingDataset("s3://my-bucket/my-data", shuffle=True) dataloader = StreamingDataLoader(dataset, num_workers=os.cpu_count(), batch_size=64) @@ -308,7 +308,7 @@ for batch_idx, batch in enumerate(dataloader): The `StreamingDataLoader` supports profiling your data loading. Simply use the `profile_batches` argument as follows: ```python -from lightning_data import StreamingDataset, StreamingDataLoader +from litdata import StreamingDataset, StreamingDataLoader StreamingDataLoader(..., profile_batches=5) ``` @@ -320,7 +320,7 @@ This generates a Chrome trace called `result.json`. You can visualize this trace Access the data you need when you need it. ```python -from lightning_data import StreamingDataset +from litdata import StreamingDataset dataset = StreamingDataset(...) @@ -332,7 +332,7 @@ print(dataset[42]) # show the 42th element of the dataset ## ✢ Use data transforms ```python -from lightning_data import StreamingDataset, StreamingDataLoader +from litdata import StreamingDataset, StreamingDataLoader import torchvision.transforms.v2.functional as F class ImagenetStreamingDataset(StreamingDataset): @@ -354,7 +354,7 @@ for batch in dataloader: Limit the size of the cache holding the chunks. ```python -from lightning_data import StreamingDataset +from litdata import StreamingDataset dataset = StreamingDataset(..., max_cache_size="10GB") ``` @@ -366,7 +366,7 @@ When processing large files like compressed [parquet files](https://en.wikipedia ```python from pathlib import Path import pyarrow.parquet as pq -from lightning_data import optimize +from litdata import optimize from tokenizer import Tokenizer from functools import partial diff --git a/docs/source/conf.py b/docs/source/conf.py index dc679337..46fd79f3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -21,7 +21,7 @@ SPHINX_MOCK_REQUIREMENTS = int(os.environ.get("SPHINX_MOCK_REQUIREMENTS", True)) # alternative https://stackoverflow.com/a/67692/4521646 -spec = spec_from_file_location("lightning_data/__about__.py", os.path.join(_PATH_ROOT, "lightning_data", "__about__.py")) +spec = spec_from_file_location("litdata/__about__.py", os.path.join(_PATH_ROOT, "litdata", "__about__.py")) about = module_from_spec(spec) spec.loader.exec_module(about) @@ -316,8 +316,8 @@ def find_source(): fname = inspect.getsourcefile(obj) # https://github.com/rtfd/readthedocs.org/issues/5735 if any(s in fname for s in ("readthedocs", "rtfd", "checkouts")): - # /home/docs/checkouts/readthedocs.org/user_builds/lightning_data/checkouts/ - # devel/lightning_data/utilities/cls_experiment.py#L26-L176 + # /home/docs/checkouts/readthedocs.org/user_builds/litdata/checkouts/ + # devel/litdata/utilities/cls_experiment.py#L26-L176 path_top = os.path.abspath(os.path.join("..", "..", "..")) fname = os.path.relpath(fname, start=path_top) else: @@ -380,8 +380,8 @@ def find_source(): import os import torch -import lightning_data -from lightning_data import StreamingDataset +import litdata +from litdata import StreamingDataset """ coverage_skip_undoc_in_source = True diff --git a/lightning_data/__init__.py b/lightning_data/__init__.py deleted file mode 100644 index 4507f70b..00000000 --- a/lightning_data/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -from lightning_utilities.core.imports import RequirementCache - -from lightning_data.__about__ import * # noqa: F403 -from lightning_data.processing.functions import map, optimize, walk -from lightning_data.streaming.combined import CombinedStreamingDataset -from lightning_data.streaming.dataloader import StreamingDataLoader -from lightning_data.streaming.dataset import StreamingDataset - -__all__ = [ - "StreamingDataset", - "CombinedStreamingDataset", - "StreamingDataLoader", - "map", - "optimize", - "walk", -] -if RequirementCache("lightning_sdk"): - from lightning_sdk import Machine # noqa: F401 - - __all__ + ["Machine"] diff --git a/lightning_data/CHANGELOG.md b/litdata/CHANGELOG.md similarity index 100% rename from lightning_data/CHANGELOG.md rename to litdata/CHANGELOG.md diff --git a/lightning_data/__about__.py b/litdata/__about__.py similarity index 100% rename from lightning_data/__about__.py rename to litdata/__about__.py diff --git a/litdata/__init__.py b/litdata/__init__.py new file mode 100644 index 00000000..9188b49e --- /dev/null +++ b/litdata/__init__.py @@ -0,0 +1,20 @@ +from lightning_utilities.core.imports import RequirementCache + +from litdata.__about__ import * # noqa: F403 +from litdata.processing.functions import map, optimize, walk +from litdata.streaming.combined import CombinedStreamingDataset +from litdata.streaming.dataloader import StreamingDataLoader +from litdata.streaming.dataset import StreamingDataset + +__all__ = [ + "StreamingDataset", + "CombinedStreamingDataset", + "StreamingDataLoader", + "map", + "optimize", + "walk", +] +if RequirementCache("lightning_sdk"): + from lightning_sdk import Machine # noqa: F401 + + __all__ + ["Machine"] diff --git a/lightning_data/constants.py b/litdata/constants.py similarity index 100% rename from lightning_data/constants.py rename to litdata/constants.py diff --git a/lightning_data/processing/__init__.py b/litdata/processing/__init__.py similarity index 100% rename from lightning_data/processing/__init__.py rename to litdata/processing/__init__.py diff --git a/lightning_data/processing/data_processor.py b/litdata/processing/data_processor.py similarity index 98% rename from lightning_data/processing/data_processor.py rename to litdata/processing/data_processor.py index a9347902..e752b0a5 100644 --- a/lightning_data/processing/data_processor.py +++ b/litdata/processing/data_processor.py @@ -21,7 +21,7 @@ import torch from tqdm.auto import tqdm as _tqdm -from lightning_data.constants import ( +from litdata.constants import ( _BOTO3_AVAILABLE, _DEFAULT_FAST_DEV_RUN_ITEMS, _INDEX_FILENAME, @@ -29,14 +29,14 @@ _LIGHTNING_CLOUD_LATEST, _TORCH_GREATER_EQUAL_2_1_0, ) -from lightning_data.processing.readers import BaseReader -from lightning_data.processing.utilities import _create_dataset -from lightning_data.streaming import Cache -from lightning_data.streaming.cache import Dir -from lightning_data.streaming.client import S3Client -from lightning_data.streaming.resolver import _resolve_dir -from lightning_data.utilities.broadcast import broadcast_object -from lightning_data.utilities.packing import _pack_greedily +from litdata.processing.readers import BaseReader +from litdata.processing.utilities import _create_dataset +from litdata.streaming import Cache +from litdata.streaming.cache import Dir +from litdata.streaming.client import S3Client +from litdata.streaming.resolver import _resolve_dir +from litdata.utilities.broadcast import broadcast_object +from litdata.utilities.packing import _pack_greedily if _TORCH_GREATER_EQUAL_2_1_0: from torch.utils._pytree import tree_flatten, tree_unflatten, treespec_loads diff --git a/lightning_data/processing/functions.py b/litdata/processing/functions.py similarity index 97% rename from lightning_data/processing/functions.py rename to litdata/processing/functions.py index 9b65eee3..8b0e35a1 100644 --- a/lightning_data/processing/functions.py +++ b/litdata/processing/functions.py @@ -22,11 +22,11 @@ import torch -from lightning_data.constants import _IS_IN_STUDIO, _TORCH_GREATER_EQUAL_2_1_0 -from lightning_data.processing.data_processor import DataChunkRecipe, DataProcessor, DataTransformRecipe -from lightning_data.processing.readers import BaseReader -from lightning_data.processing.utilities import optimize_dns_context -from lightning_data.streaming.resolver import ( +from litdata.constants import _IS_IN_STUDIO, _TORCH_GREATER_EQUAL_2_1_0 +from litdata.processing.data_processor import DataChunkRecipe, DataProcessor, DataTransformRecipe +from litdata.processing.readers import BaseReader +from litdata.processing.utilities import optimize_dns_context +from litdata.streaming.resolver import ( Dir, _assert_dir_has_index_file, _assert_dir_is_empty, diff --git a/lightning_data/processing/readers.py b/litdata/processing/readers.py similarity index 100% rename from lightning_data/processing/readers.py rename to litdata/processing/readers.py diff --git a/lightning_data/processing/utilities.py b/litdata/processing/utilities.py similarity index 96% rename from lightning_data/processing/utilities.py rename to litdata/processing/utilities.py index a049fd81..61acd3f8 100644 --- a/lightning_data/processing/utilities.py +++ b/litdata/processing/utilities.py @@ -5,7 +5,7 @@ from subprocess import DEVNULL, Popen from typing import Any, Callable, List, Optional, Tuple, Union -from lightning_data.constants import _IS_IN_STUDIO, _LIGHTNING_CLOUD_LATEST +from litdata.constants import _IS_IN_STUDIO, _LIGHTNING_CLOUD_LATEST if _LIGHTNING_CLOUD_LATEST: from lightning_cloud.openapi import ( @@ -132,7 +132,7 @@ def optimize_dns(enable: bool) -> None: ): cmd = ( f"sudo /home/zeus/miniconda3/envs/cloudspace/bin/python" - f" -c 'from lightning_data.processing.utilities import _optimize_dns; _optimize_dns({enable})'" + f" -c 'from litdata.processing.utilities import _optimize_dns; _optimize_dns({enable})'" ) Popen(cmd, shell=True, stdout=DEVNULL, stderr=DEVNULL).wait() # E501 diff --git a/lightning_data/streaming/__init__.py b/litdata/streaming/__init__.py similarity index 69% rename from lightning_data/streaming/__init__.py rename to litdata/streaming/__init__.py index 8c79bb98..d245c37b 100644 --- a/lightning_data/streaming/__init__.py +++ b/litdata/streaming/__init__.py @@ -11,11 +11,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from lightning_data.streaming.cache import Cache -from lightning_data.streaming.combined import CombinedStreamingDataset -from lightning_data.streaming.dataloader import StreamingDataLoader -from lightning_data.streaming.dataset import StreamingDataset -from lightning_data.streaming.item_loader import TokensLoader +from litdata.streaming.cache import Cache +from litdata.streaming.combined import CombinedStreamingDataset +from litdata.streaming.dataloader import StreamingDataLoader +from litdata.streaming.dataset import StreamingDataset +from litdata.streaming.item_loader import TokensLoader __all__ = [ "Cache", diff --git a/lightning_data/streaming/cache.py b/litdata/streaming/cache.py similarity index 91% rename from lightning_data/streaming/cache.py rename to litdata/streaming/cache.py index 105a9a4d..18d78e31 100644 --- a/lightning_data/streaming/cache.py +++ b/litdata/streaming/cache.py @@ -15,19 +15,19 @@ import os from typing import Any, Dict, List, Optional, Tuple, Union -from lightning_data.constants import ( +from litdata.constants import ( _INDEX_FILENAME, _LIGHTNING_CLOUD_LATEST, _TORCH_GREATER_EQUAL_2_1_0, ) -from lightning_data.streaming.item_loader import BaseItemLoader -from lightning_data.streaming.reader import BinaryReader -from lightning_data.streaming.resolver import Dir, _resolve_dir -from lightning_data.streaming.sampler import ChunkedIndex -from lightning_data.streaming.serializers import Serializer -from lightning_data.streaming.writer import BinaryWriter -from lightning_data.utilities.env import _DistributedEnv, _WorkerEnv -from lightning_data.utilities.format import _convert_bytes_to_int +from litdata.streaming.item_loader import BaseItemLoader +from litdata.streaming.reader import BinaryReader +from litdata.streaming.resolver import Dir, _resolve_dir +from litdata.streaming.sampler import ChunkedIndex +from litdata.streaming.serializers import Serializer +from litdata.streaming.writer import BinaryWriter +from litdata.utilities.env import _DistributedEnv, _WorkerEnv +from litdata.utilities.format import _convert_bytes_to_int logger = logging.Logger(__name__) diff --git a/lightning_data/streaming/client.py b/litdata/streaming/client.py similarity index 97% rename from lightning_data/streaming/client.py rename to litdata/streaming/client.py index 6bf52986..0a12616d 100644 --- a/lightning_data/streaming/client.py +++ b/litdata/streaming/client.py @@ -2,7 +2,7 @@ from time import time from typing import Any, Optional -from lightning_data.constants import _BOTO3_AVAILABLE +from litdata.constants import _BOTO3_AVAILABLE if _BOTO3_AVAILABLE: import boto3 diff --git a/lightning_data/streaming/combined.py b/litdata/streaming/combined.py similarity index 98% rename from lightning_data/streaming/combined.py rename to litdata/streaming/combined.py index 721e8398..3209c263 100644 --- a/lightning_data/streaming/combined.py +++ b/litdata/streaming/combined.py @@ -16,8 +16,8 @@ from torch.utils.data import IterableDataset -from lightning_data.streaming.dataset import StreamingDataset -from lightning_data.utilities.env import _WorkerEnv +from litdata.streaming.dataset import StreamingDataset +from litdata.utilities.env import _WorkerEnv __NUM_SAMPLES_YIELDED_KEY__ = "__NUM_SAMPLES_YIELDED__" __SAMPLES_KEY__ = "__SAMPLES__" diff --git a/lightning_data/streaming/compression.py b/litdata/streaming/compression.py similarity index 100% rename from lightning_data/streaming/compression.py rename to litdata/streaming/compression.py diff --git a/lightning_data/streaming/config.py b/litdata/streaming/config.py similarity index 94% rename from lightning_data/streaming/config.py rename to litdata/streaming/config.py index 4d011237..befb3019 100644 --- a/lightning_data/streaming/config.py +++ b/litdata/streaming/config.py @@ -15,11 +15,11 @@ import os from typing import Any, Dict, List, Optional, Tuple -from lightning_data.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0 -from lightning_data.streaming.downloader import get_downloader_cls -from lightning_data.streaming.item_loader import BaseItemLoader, PyTreeLoader, TokensLoader -from lightning_data.streaming.sampler import ChunkedIndex -from lightning_data.streaming.serializers import Serializer +from litdata.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0 +from litdata.streaming.downloader import get_downloader_cls +from litdata.streaming.item_loader import BaseItemLoader, PyTreeLoader, TokensLoader +from litdata.streaming.sampler import ChunkedIndex +from litdata.streaming.serializers import Serializer if _TORCH_GREATER_EQUAL_2_1_0: from torch.utils._pytree import tree_unflatten, treespec_loads diff --git a/lightning_data/streaming/dataloader.py b/litdata/streaming/dataloader.py similarity index 97% rename from lightning_data/streaming/dataloader.py rename to litdata/streaming/dataloader.py index ab313c39..72c360d1 100644 --- a/lightning_data/streaming/dataloader.py +++ b/litdata/streaming/dataloader.py @@ -33,16 +33,16 @@ ) from torch.utils.data.sampler import BatchSampler, Sampler -from lightning_data.constants import _DEFAULT_CHUNK_BYTES, _TORCH_GREATER_EQUAL_2_1_0, _VIZ_TRACKER_AVAILABLE -from lightning_data.streaming import Cache -from lightning_data.streaming.combined import ( +from litdata.constants import _DEFAULT_CHUNK_BYTES, _TORCH_GREATER_EQUAL_2_1_0, _VIZ_TRACKER_AVAILABLE +from litdata.streaming import Cache +from litdata.streaming.combined import ( __NUM_SAMPLES_YIELDED_KEY__, __SAMPLES_KEY__, CombinedStreamingDataset, ) -from lightning_data.streaming.dataset import StreamingDataset -from lightning_data.streaming.sampler import CacheBatchSampler -from lightning_data.utilities.env import _DistributedEnv +from litdata.streaming.dataset import StreamingDataset +from litdata.streaming.sampler import CacheBatchSampler +from litdata.utilities.env import _DistributedEnv if _TORCH_GREATER_EQUAL_2_1_0: from torch.utils._pytree import tree_flatten @@ -105,7 +105,7 @@ def __getitem__(self, index: int) -> Any: if not _equal_items(data_1, data2): raise ValueError( f"Your dataset items aren't deterministic. Found {data_1} and {data2} for index {index}." - " HINT: Use the `lightning_data.cache.Cache` directly within your dataset." + " HINT: Use the `litdata.cache.Cache` directly within your dataset." ) self._is_deterministic = True self._cache[index] = data_1 @@ -180,7 +180,7 @@ def __call__( ) -> None: from torch.utils.data._utils import worker - from lightning_data.streaming.cache import Cache + from litdata.streaming.cache import Cache enable_profiling = self._global_rank == 0 and worker_id == 0 and _VIZ_TRACKER_AVAILABLE and self._profile @@ -481,7 +481,7 @@ def _try_put_index(self) -> None: class StreamingDataLoader(DataLoader): r"""The StreamingDataLoader combines a dataset and a sampler, and provides an iterable over the given dataset. - The :class:`~lightning_data.streaming.dataloader.StreamingDataLoader` supports either a + The :class:`~litdata.streaming.dataloader.StreamingDataLoader` supports either a StreamingDataset and CombinedStreamingDataset datasets with single- or multi-process loading, customizing loading order and optional automatic batching (collation) and memory pinning. diff --git a/lightning_data/streaming/dataset.py b/litdata/streaming/dataset.py similarity index 97% rename from lightning_data/streaming/dataset.py rename to litdata/streaming/dataset.py index 4f9dcfc5..08744603 100644 --- a/lightning_data/streaming/dataset.py +++ b/litdata/streaming/dataset.py @@ -20,17 +20,17 @@ import numpy as np from torch.utils.data import IterableDataset -from lightning_data.constants import ( +from litdata.constants import ( _DEFAULT_CACHE_DIR, _INDEX_FILENAME, ) -from lightning_data.streaming import Cache -from lightning_data.streaming.item_loader import BaseItemLoader -from lightning_data.streaming.resolver import Dir, _resolve_dir -from lightning_data.streaming.sampler import ChunkedIndex -from lightning_data.streaming.serializers import Serializer -from lightning_data.streaming.shuffle import FullShuffle, NoShuffle, Shuffle -from lightning_data.utilities.env import _DistributedEnv, _is_in_dataloader_worker, _WorkerEnv +from litdata.streaming import Cache +from litdata.streaming.item_loader import BaseItemLoader +from litdata.streaming.resolver import Dir, _resolve_dir +from litdata.streaming.sampler import ChunkedIndex +from litdata.streaming.serializers import Serializer +from litdata.streaming.shuffle import FullShuffle, NoShuffle, Shuffle +from litdata.utilities.env import _DistributedEnv, _is_in_dataloader_worker, _WorkerEnv logger = Logger(__name__) diff --git a/lightning_data/streaming/downloader.py b/litdata/streaming/downloader.py similarity index 97% rename from lightning_data/streaming/downloader.py rename to litdata/streaming/downloader.py index 288fc51a..ea3a3571 100644 --- a/lightning_data/streaming/downloader.py +++ b/litdata/streaming/downloader.py @@ -19,8 +19,8 @@ from filelock import FileLock, Timeout -from lightning_data.constants import _INDEX_FILENAME -from lightning_data.streaming.client import S3Client +from litdata.constants import _INDEX_FILENAME +from litdata.streaming.client import S3Client class Downloader(ABC): diff --git a/lightning_data/streaming/item_loader.py b/litdata/streaming/item_loader.py similarity index 98% rename from lightning_data/streaming/item_loader.py rename to litdata/streaming/item_loader.py index 7a3f694d..b578e3bf 100644 --- a/lightning_data/streaming/item_loader.py +++ b/litdata/streaming/item_loader.py @@ -19,11 +19,11 @@ import numpy as np import torch -from lightning_data.constants import ( +from litdata.constants import ( _TORCH_DTYPES_MAPPING, _TORCH_GREATER_EQUAL_2_1_0, ) -from lightning_data.streaming.serializers import Serializer +from litdata.streaming.serializers import Serializer if _TORCH_GREATER_EQUAL_2_1_0: from torch.utils._pytree import PyTree, tree_unflatten diff --git a/lightning_data/streaming/reader.py b/litdata/streaming/reader.py similarity index 96% rename from lightning_data/streaming/reader.py rename to litdata/streaming/reader.py index 5a2fe760..ad63175c 100644 --- a/lightning_data/streaming/reader.py +++ b/litdata/streaming/reader.py @@ -20,12 +20,12 @@ from threading import Thread from typing import Any, Dict, List, Optional, Tuple, Union -from lightning_data.constants import _TORCH_GREATER_EQUAL_2_1_0 -from lightning_data.streaming.config import ChunksConfig -from lightning_data.streaming.item_loader import BaseItemLoader, PyTreeLoader -from lightning_data.streaming.sampler import ChunkedIndex -from lightning_data.streaming.serializers import Serializer, _get_serializers -from lightning_data.utilities.env import _DistributedEnv, _WorkerEnv +from litdata.constants import _TORCH_GREATER_EQUAL_2_1_0 +from litdata.streaming.config import ChunksConfig +from litdata.streaming.item_loader import BaseItemLoader, PyTreeLoader +from litdata.streaming.sampler import ChunkedIndex +from litdata.streaming.serializers import Serializer, _get_serializers +from litdata.utilities.env import _DistributedEnv, _WorkerEnv warnings.filterwarnings("ignore", message=".*The given buffer is not writable.*") diff --git a/lightning_data/streaming/resolver.py b/litdata/streaming/resolver.py similarity index 100% rename from lightning_data/streaming/resolver.py rename to litdata/streaming/resolver.py diff --git a/lightning_data/streaming/sampler.py b/litdata/streaming/sampler.py similarity index 100% rename from lightning_data/streaming/sampler.py rename to litdata/streaming/sampler.py diff --git a/lightning_data/streaming/serializers.py b/litdata/streaming/serializers.py similarity index 99% rename from lightning_data/streaming/serializers.py rename to litdata/streaming/serializers.py index 2170fba5..700251d8 100644 --- a/lightning_data/streaming/serializers.py +++ b/litdata/streaming/serializers.py @@ -23,7 +23,7 @@ import torch from lightning_utilities.core.imports import RequirementCache -from lightning_data.constants import _NUMPY_DTYPES_MAPPING, _TORCH_DTYPES_MAPPING +from litdata.constants import _NUMPY_DTYPES_MAPPING, _TORCH_DTYPES_MAPPING _PIL_AVAILABLE = RequirementCache("PIL") _TORCH_VISION_AVAILABLE = RequirementCache("torchvision") diff --git a/lightning_data/streaming/shuffle.py b/litdata/streaming/shuffle.py similarity index 96% rename from lightning_data/streaming/shuffle.py rename to litdata/streaming/shuffle.py index cbe2500e..c8013ef5 100644 --- a/lightning_data/streaming/shuffle.py +++ b/litdata/streaming/shuffle.py @@ -17,9 +17,9 @@ import numpy as np -from lightning_data.streaming import Cache -from lightning_data.utilities.env import _DistributedEnv -from lightning_data.utilities.shuffle import _associate_chunks_and_internals_to_ranks, _intra_node_chunk_shuffle +from litdata.streaming import Cache +from litdata.utilities.env import _DistributedEnv +from litdata.utilities.shuffle import _associate_chunks_and_internals_to_ranks, _intra_node_chunk_shuffle class Shuffle(ABC): diff --git a/lightning_data/streaming/writer.py b/litdata/streaming/writer.py similarity index 97% rename from lightning_data/streaming/writer.py rename to litdata/streaming/writer.py index 970fcbc7..7586ed03 100644 --- a/lightning_data/streaming/writer.py +++ b/litdata/streaming/writer.py @@ -21,12 +21,12 @@ import numpy as np import torch -from lightning_data.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0 -from lightning_data.processing.utilities import get_worker_rank -from lightning_data.streaming.compression import _COMPRESSORS, Compressor -from lightning_data.streaming.serializers import Serializer, _get_serializers -from lightning_data.utilities.env import _DistributedEnv, _WorkerEnv -from lightning_data.utilities.format import _convert_bytes_to_int, _human_readable_bytes +from litdata.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0 +from litdata.processing.utilities import get_worker_rank +from litdata.streaming.compression import _COMPRESSORS, Compressor +from litdata.streaming.serializers import Serializer, _get_serializers +from litdata.utilities.env import _DistributedEnv, _WorkerEnv +from litdata.utilities.format import _convert_bytes_to_int, _human_readable_bytes if _TORCH_GREATER_EQUAL_2_1_0: from torch.utils._pytree import PyTree, tree_flatten, treespec_dumps diff --git a/lightning_data/utilities/__init__.py b/litdata/utilities/__init__.py similarity index 100% rename from lightning_data/utilities/__init__.py rename to litdata/utilities/__init__.py diff --git a/lightning_data/utilities/broadcast.py b/litdata/utilities/broadcast.py similarity index 100% rename from lightning_data/utilities/broadcast.py rename to litdata/utilities/broadcast.py diff --git a/lightning_data/utilities/env.py b/litdata/utilities/env.py similarity index 100% rename from lightning_data/utilities/env.py rename to litdata/utilities/env.py diff --git a/lightning_data/utilities/format.py b/litdata/utilities/format.py similarity index 100% rename from lightning_data/utilities/format.py rename to litdata/utilities/format.py diff --git a/lightning_data/utilities/packing.py b/litdata/utilities/packing.py similarity index 100% rename from lightning_data/utilities/packing.py rename to litdata/utilities/packing.py diff --git a/lightning_data/utilities/shuffle.py b/litdata/utilities/shuffle.py similarity index 98% rename from lightning_data/utilities/shuffle.py rename to litdata/utilities/shuffle.py index 430ac21e..8b24a6de 100644 --- a/lightning_data/utilities/shuffle.py +++ b/litdata/utilities/shuffle.py @@ -2,7 +2,7 @@ import numpy as np -from lightning_data.utilities.env import _DistributedEnv +from litdata.utilities.env import _DistributedEnv def _intra_node_chunk_shuffle( diff --git a/pyproject.toml b/pyproject.toml index 78755b71..38238681 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ # limitations under the License. [metadata] -name = "lightning_data" +name = "litdata" author = "Lightning-AI et al." url = "https://github.com/Lightning-AI/lit-data" @@ -87,7 +87,7 @@ lint.ignore-init-module-imports = true "S501", # Probable use of `requests` call with `verify=False` disabling SSL certificate checks "S108", # Probable insecure usage of temporary file or directory: "/tmp/data/MNIST" ] -"lightning_data/**" = [ +"litdata/**" = [ "S101", # todo: Use of `assert` detected "S105", "S106", "S107", # todo: Possible hardcoded password: ... "S113", # todo: Probable use of requests call without timeout @@ -131,7 +131,7 @@ max-complexity = 10 [tool.mypy] files = [ - "lightning_data", + "litdata", ] # This section is for folders with "-" as they are not valid python modules exclude = [ diff --git a/setup.py b/setup.py index 9eb41dee..7a0c51e2 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ _PATH_REQUIRES = os.path.join(_PATH_ROOT, "requirements") -def _load_py_module(fname, pkg="lightning_data"): +def _load_py_module(fname, pkg="litdata"): spec = spec_from_file_location(os.path.join(pkg, fname), os.path.join(_PATH_ROOT, pkg, fname)) py = module_from_spec(spec) spec.loader.exec_module(py) @@ -51,7 +51,7 @@ def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple = # the goal of the project is simplicity for researchers, don't want to add too much # engineer specific practices setup( - name="lightning-data", + name="litdata", version=about.__version__, description=about.__docs__, author=about.__author__, @@ -59,8 +59,8 @@ def _prepare_extras(requirements_dir: str = _PATH_REQUIRES, skip_files: tuple = url=about.__homepage__, download_url="https://github.com/Lightning-AI/lit-data", license=about.__license__, - packages=find_packages(where="lightning_data"), - package_dir={"": "lightning_data"}, + packages=find_packages(where="litdata"), + package_dir={"": "litdata"}, long_description=readme, long_description_content_type="text/markdown", include_package_data=True, diff --git a/tests/processing/test_data_processor.py b/tests/processing/test_data_processor.py index 6fb500b0..950f70c1 100644 --- a/tests/processing/test_data_processor.py +++ b/tests/processing/test_data_processor.py @@ -12,9 +12,9 @@ from lightning import seed_everything from lightning_utilities.core.imports import RequirementCache -from lightning_data.processing import data_processor as data_processor_module -from lightning_data.processing import functions -from lightning_data.processing.data_processor import ( +from litdata.processing import data_processor as data_processor_module +from litdata.processing import functions +from litdata.processing.data_processor import ( DataChunkRecipe, DataProcessor, DataTransformRecipe, @@ -27,9 +27,9 @@ _wait_for_disk_usage_higher_than_threshold, _wait_for_file_to_exist, ) -from lightning_data.processing.functions import LambdaDataTransformRecipe, map, optimize -from lightning_data.streaming import resolver -from lightning_data.streaming.cache import Cache, Dir +from litdata.processing.functions import LambdaDataTransformRecipe, map, optimize +from litdata.streaming import resolver +from litdata.streaming.cache import Cache, Dir _PIL_AVAILABLE = RequirementCache("PIL") @@ -164,7 +164,7 @@ def fn(*_, **__): @pytest.mark.skipif(condition=sys.platform == "win32", reason="Not supported on windows") -@mock.patch("lightning_data.processing.data_processor._wait_for_disk_usage_higher_than_threshold") +@mock.patch("litdata.processing.data_processor._wait_for_disk_usage_higher_than_threshold") def test_download_data_target(wait_for_disk_usage_higher_than_threshold_mock, tmpdir): input_dir = os.path.join(tmpdir, "input_dir") os.makedirs(input_dir, exist_ok=True) @@ -203,7 +203,7 @@ def fn(*_, **__): def test_wait_for_disk_usage_higher_than_threshold(): disk_usage_mock = mock.Mock(side_effect=[mock.Mock(free=10e9), mock.Mock(free=10e9), mock.Mock(free=10e11)]) - with mock.patch("lightning_data.processing.data_processor.shutil.disk_usage", disk_usage_mock): + with mock.patch("litdata.processing.data_processor.shutil.disk_usage", disk_usage_mock): _wait_for_disk_usage_higher_than_threshold("/", 10, sleep_time=0) assert disk_usage_mock.call_count == 3 diff --git a/tests/processing/test_functions.py b/tests/processing/test_functions.py index 7847b96a..a939c0e9 100644 --- a/tests/processing/test_functions.py +++ b/tests/processing/test_functions.py @@ -4,8 +4,8 @@ import pytest -from lightning_data import walk -from lightning_data.processing.functions import _get_input_dir +from litdata import walk +from litdata.processing.functions import _get_input_dir @pytest.mark.skipif(sys.platform == "win32", reason="currently not supported for windows.") diff --git a/tests/processing/test_readers.py b/tests/processing/test_readers.py index fdf9396f..ff4b33ee 100644 --- a/tests/processing/test_readers.py +++ b/tests/processing/test_readers.py @@ -3,8 +3,8 @@ import pytest -from lightning_data import map -from lightning_data.processing.readers import _PYARROW_AVAILABLE, BaseReader, ParquetReader +from litdata import map +from litdata.processing.readers import _PYARROW_AVAILABLE, BaseReader, ParquetReader class DummyReader(BaseReader): diff --git a/tests/processing/test_utilities.py b/tests/processing/test_utilities.py index 596b80b8..436e6063 100644 --- a/tests/processing/test_utilities.py +++ b/tests/processing/test_utilities.py @@ -1,7 +1,7 @@ from unittest.mock import MagicMock -from lightning_data.processing import utilities as utilities_module -from lightning_data.processing.utilities import optimize_dns_context +from litdata.processing import utilities as utilities_module +from litdata.processing.utilities import optimize_dns_context def test_optimize_dns_context(monkeypatch): @@ -31,6 +31,6 @@ def readlines(self): cmd = popen_mock._mock_call_args_list[0].args[0] expected_cmd = ( "sudo /home/zeus/miniconda3/envs/cloudspace/bin/python" - " -c 'from lightning_data.processing.utilities import _optimize_dns; _optimize_dns(True)'" + " -c 'from litdata.processing.utilities import _optimize_dns; _optimize_dns(True)'" ) assert cmd == expected_cmd diff --git a/tests/streaming/test_cache.py b/tests/streaming/test_cache.py index b2326353..c08c0a78 100644 --- a/tests/streaming/test_cache.py +++ b/tests/streaming/test_cache.py @@ -25,12 +25,12 @@ from lightning_utilities.test.warning import no_warning_call from torch.utils.data import Dataset -from lightning_data.streaming import Cache -from lightning_data.streaming.dataloader import CacheDataLoader -from lightning_data.streaming.dataset import StreamingDataset -from lightning_data.streaming.item_loader import TokensLoader -from lightning_data.streaming.serializers import Serializer -from lightning_data.utilities.env import _DistributedEnv +from litdata.streaming import Cache +from litdata.streaming.dataloader import CacheDataLoader +from litdata.streaming.dataset import StreamingDataset +from litdata.streaming.item_loader import TokensLoader +from litdata.streaming.serializers import Serializer +from litdata.utilities.env import _DistributedEnv _PIL_AVAILABLE = RequirementCache("PIL") _TORCH_VISION_AVAILABLE = RequirementCache("torchvision") diff --git a/tests/streaming/test_client.py b/tests/streaming/test_client.py index 260f1b92..ca933604 100644 --- a/tests/streaming/test_client.py +++ b/tests/streaming/test_client.py @@ -4,7 +4,7 @@ import pytest -from lightning_data.streaming import client +from litdata.streaming import client def test_s3_client_without_cloud_space_id(monkeypatch): diff --git a/tests/streaming/test_combined.py b/tests/streaming/test_combined.py index 029c6b94..40db3bf6 100644 --- a/tests/streaming/test_combined.py +++ b/tests/streaming/test_combined.py @@ -7,10 +7,10 @@ from torch.utils.data import IterableDataset from torch.utils.data.dataloader import DataLoader -from lightning_data.streaming.cache import Cache -from lightning_data.streaming.combined import CombinedStreamingDataset -from lightning_data.streaming.dataloader import StreamingDataLoader -from lightning_data.streaming.dataset import Dir, StreamingDataset +from litdata.streaming.cache import Cache +from litdata.streaming.combined import CombinedStreamingDataset +from litdata.streaming.dataloader import StreamingDataLoader +from litdata.streaming.dataset import Dir, StreamingDataset class TestCombinedStreamingDataset(CombinedStreamingDataset): diff --git a/tests/streaming/test_dataloader.py b/tests/streaming/test_dataloader.py index b0aed53f..e5bbd17e 100644 --- a/tests/streaming/test_dataloader.py +++ b/tests/streaming/test_dataloader.py @@ -4,8 +4,8 @@ import torch from torch import tensor -from lightning_data.streaming import CombinedStreamingDataset, StreamingDataLoader -from lightning_data.streaming import dataloader as streaming_dataloader_module +from litdata.streaming import CombinedStreamingDataset, StreamingDataLoader +from litdata.streaming import dataloader as streaming_dataloader_module class TestStatefulDataset: diff --git a/tests/streaming/test_dataset.py b/tests/streaming/test_dataset.py index a7f9607b..b8d2bda0 100644 --- a/tests/streaming/test_dataset.py +++ b/tests/streaming/test_dataset.py @@ -22,11 +22,11 @@ from lightning import seed_everything from torch.utils.data import DataLoader -from lightning_data.processing import functions -from lightning_data.streaming import Cache -from lightning_data.streaming import dataset as dataset_module -from lightning_data.streaming.dataloader import StreamingDataLoader -from lightning_data.streaming.dataset import ( +from litdata.processing import functions +from litdata.streaming import Cache +from litdata.streaming import dataset as dataset_module +from litdata.streaming.dataloader import StreamingDataLoader +from litdata.streaming.dataset import ( _INDEX_FILENAME, Dir, StreamingDataset, @@ -36,9 +36,9 @@ _should_replace_path, _try_create_cache_dir, ) -from lightning_data.streaming.item_loader import TokensLoader -from lightning_data.streaming.shuffle import FullShuffle, NoShuffle -from lightning_data.utilities.env import _DistributedEnv, _WorkerEnv +from litdata.streaming.item_loader import TokensLoader +from litdata.streaming.shuffle import FullShuffle, NoShuffle +from litdata.utilities.env import _DistributedEnv, _WorkerEnv def test_streaming_dataset(tmpdir, monkeypatch): @@ -392,7 +392,7 @@ def test_try_create_cache_dir(): # the cache dir creating at /cache requires root privileges, so we need to mock `os.makedirs()` with ( mock.patch.dict("os.environ", {"LIGHTNING_CLUSTER_ID": "abc", "LIGHTNING_CLOUD_PROJECT_ID": "123"}), - mock.patch("lightning_data.streaming.dataset.os.makedirs") as makedirs_mock, + mock.patch("litdata.streaming.dataset.os.makedirs") as makedirs_mock, ): cache_dir_1 = _try_create_cache_dir("") cache_dir_2 = _try_create_cache_dir("ssdf") diff --git a/tests/streaming/test_downloader.py b/tests/streaming/test_downloader.py index 4f26bfbf..218d1bda 100644 --- a/tests/streaming/test_downloader.py +++ b/tests/streaming/test_downloader.py @@ -1,7 +1,7 @@ import os from unittest.mock import MagicMock -from lightning_data.streaming.downloader import S3Downloader, subprocess +from litdata.streaming.downloader import S3Downloader, subprocess def test_s3_downloader_fast(tmpdir, monkeypatch): diff --git a/tests/streaming/test_reader.py b/tests/streaming/test_reader.py index 8ab18ff9..8fb4d542 100644 --- a/tests/streaming/test_reader.py +++ b/tests/streaming/test_reader.py @@ -4,13 +4,13 @@ import numpy as np -from lightning_data.streaming import reader -from lightning_data.streaming.cache import Cache -from lightning_data.streaming.config import ChunkedIndex -from lightning_data.streaming.item_loader import PyTreeLoader -from lightning_data.streaming.reader import _END_TOKEN, PrepareChunksThread, _get_folder_size -from lightning_data.streaming.resolver import Dir -from lightning_data.utilities.env import _DistributedEnv +from litdata.streaming import reader +from litdata.streaming.cache import Cache +from litdata.streaming.config import ChunkedIndex +from litdata.streaming.item_loader import PyTreeLoader +from litdata.streaming.reader import _END_TOKEN, PrepareChunksThread, _get_folder_size +from litdata.streaming.resolver import Dir +from litdata.utilities.env import _DistributedEnv def test_reader_chunk_removal(tmpdir): diff --git a/tests/streaming/test_resolver.py b/tests/streaming/test_resolver.py index 56b1e872..b4c7e177 100644 --- a/tests/streaming/test_resolver.py +++ b/tests/streaming/test_resolver.py @@ -16,7 +16,7 @@ V1ListDataConnectionsResponse, ) -from lightning_data.streaming import resolver +from litdata.streaming import resolver @pytest.mark.skipif(sys.platform == "win32", reason="windows isn't supported") diff --git a/tests/streaming/test_sampler.py b/tests/streaming/test_sampler.py index a78baf04..1879771b 100644 --- a/tests/streaming/test_sampler.py +++ b/tests/streaming/test_sampler.py @@ -3,7 +3,7 @@ import pytest from lightning import seed_everything -from lightning_data.streaming.sampler import CacheBatchSampler +from litdata.streaming.sampler import CacheBatchSampler @pytest.mark.parametrize( diff --git a/tests/streaming/test_serializer.py b/tests/streaming/test_serializer.py index 54db39b9..bd5fb002 100644 --- a/tests/streaming/test_serializer.py +++ b/tests/streaming/test_serializer.py @@ -22,7 +22,7 @@ from lightning import seed_everything from lightning_utilities.core.imports import RequirementCache -from lightning_data.streaming.serializers import ( +from litdata.streaming.serializers import ( _AV_AVAILABLE, _NUMPY_DTYPES_MAPPING, _SERIALIZERS, diff --git a/tests/streaming/test_writer.py b/tests/streaming/test_writer.py index 49bdc6c5..4a25d678 100644 --- a/tests/streaming/test_writer.py +++ b/tests/streaming/test_writer.py @@ -20,11 +20,11 @@ from lightning import seed_everything from lightning_utilities.core.imports import RequirementCache -from lightning_data.streaming.compression import _ZSTD_AVAILABLE -from lightning_data.streaming.reader import BinaryReader -from lightning_data.streaming.sampler import ChunkedIndex -from lightning_data.streaming.writer import BinaryWriter -from lightning_data.utilities.format import _FORMAT_TO_RATIO +from litdata.streaming.compression import _ZSTD_AVAILABLE +from litdata.streaming.reader import BinaryReader +from litdata.streaming.sampler import ChunkedIndex +from litdata.streaming.writer import BinaryWriter +from litdata.utilities.format import _FORMAT_TO_RATIO _PIL_AVAILABLE = RequirementCache("PIL") diff --git a/tests/utilities/test_broadcast.py b/tests/utilities/test_broadcast.py index f6511946..175c638b 100644 --- a/tests/utilities/test_broadcast.py +++ b/tests/utilities/test_broadcast.py @@ -1,7 +1,7 @@ import os from unittest import mock -from lightning_data.utilities.broadcast import broadcast_object, requests +from litdata.utilities.broadcast import broadcast_object, requests @mock.patch.dict( diff --git a/tests/utilities/test_format.py b/tests/utilities/test_format.py index e8dcd592..91c48a40 100644 --- a/tests/utilities/test_format.py +++ b/tests/utilities/test_format.py @@ -1,4 +1,4 @@ -from lightning_data.utilities.format import _human_readable_bytes +from litdata.utilities.format import _human_readable_bytes def test_human_readable_bytes(): diff --git a/tests/utilities/test_packing.py b/tests/utilities/test_packing.py index 5fddcc25..c9a7c133 100644 --- a/tests/utilities/test_packing.py +++ b/tests/utilities/test_packing.py @@ -1,6 +1,6 @@ import pytest -from lightning_data.utilities.packing import _pack_greedily +from litdata.utilities.packing import _pack_greedily def test_pack_greedily(): diff --git a/tests/utilities/test_shuffle.py b/tests/utilities/test_shuffle.py index db6084a1..6923d82b 100644 --- a/tests/utilities/test_shuffle.py +++ b/tests/utilities/test_shuffle.py @@ -1,5 +1,5 @@ -from lightning_data.utilities.env import _DistributedEnv -from lightning_data.utilities.shuffle import _associate_chunks_and_internals_to_ranks, _intra_node_chunk_shuffle +from litdata.utilities.env import _DistributedEnv +from litdata.utilities.shuffle import _associate_chunks_and_internals_to_ranks, _intra_node_chunk_shuffle def test_intra_node_chunk_shuffle():