Skip to content

Commit

Permalink
rename litdata (#16)
Browse files Browse the repository at this point in the history
  • Loading branch information
Borda authored Feb 23, 2024
1 parent a2c7847 commit 1368f64
Show file tree
Hide file tree
Showing 59 changed files with 184 additions and 184 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
uses: Lightning-AI/utilities/.github/workflows/[email protected]
with:
actions-ref: v0.10.1
import-name: "lightning_data"
import-name: "litdata"
artifact-name: dist-packages-${{ github.sha }}
testing-matrix: |
{
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
pip list
- name: Tests
run: coverage run --source lightning_data -m pytest tests -v
run: coverage run --source litdata -m pytest tests -v

- name: Statistics
if: success()
Expand Down
4 changes: 2 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ recursive-exclude __pycache__ *.py[cod] *.orig

# Include the README and CHANGELOG
include *.md
recursive-include lightning_data *.md
recursive-include litdata *.md

# Include the code
recursive-include lightning_data *.py
recursive-include litdata *.py

# Include the license file
include LICENSE
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ test: clean
pip install -q -r requirements/test.txt

# use this to run tests
python -m coverage run --source lightning_data -m pytest src -v --flake8
python -m coverage run --source litdata -m pytest src -v --flake8
python -m coverage report

docs: clean
Expand Down
26 changes: 13 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ Convert your raw dataset into Lightning Streaming format using the `optimize` op

```python
import numpy as np
from lightning_data import optimize
from litdata import optimize
from PIL import Image


Expand Down Expand Up @@ -123,7 +123,7 @@ Here is an example with [AWS S3](https://aws.amazon.com/s3).
### 3. Use StreamingDataset and DataLoader

```python
from lightning_data import StreamingDataset
from litdata import StreamingDataset
from torch.utils.data import DataLoader

# Remote path where full dataset is persistently stored
Expand Down Expand Up @@ -178,7 +178,7 @@ for i in range(1000):

```python
import os
from lightning_data import map
from litdata import map
from PIL import Image

input_dir = "s3://my-bucket/my_images"
Expand All @@ -202,7 +202,7 @@ if __name__ == "__main__":
To scale data processing, create a free account on [lightning.ai](https://lightning.ai/) platform. With the platform, the `optimize` and `map` can start multiple machines to make data processing drastically faster as follows:

```python
from lightning_data import optimize, Machine
from litdata import optimize, Machine

optimize(
...
Expand All @@ -214,7 +214,7 @@ optimize(
OR

```python
from lightning_data import map, Machine
from litdata import map, Machine

map(
...
Expand Down Expand Up @@ -244,8 +244,8 @@ The `StreamingDataset` and `StreamingDataLoader` takes care of everything for yo
You can easily experiment with dataset mixtures using the CombinedStreamingDataset.

```python
from lightning_data import StreamingDataset, CombinedStreamingDataset
from lightning_data.streaming.item_loader import TokensLoader
from litdata import StreamingDataset, CombinedStreamingDataset
from litdata.streaming.item_loader import TokensLoader
from tqdm import tqdm
import os
from torch.utils.data import DataLoader
Expand Down Expand Up @@ -285,7 +285,7 @@ Note: The `StreamingDataLoader` is used by [Lit-GPT](https://github.com/Lightnin
```python
import os
import torch
from lightning_data import StreamingDataset, StreamingDataLoader
from litdata import StreamingDataset, StreamingDataLoader

dataset = StreamingDataset("s3://my-bucket/my-data", shuffle=True)
dataloader = StreamingDataLoader(dataset, num_workers=os.cpu_count(), batch_size=64)
Expand All @@ -308,7 +308,7 @@ for batch_idx, batch in enumerate(dataloader):
The `StreamingDataLoader` supports profiling your data loading. Simply use the `profile_batches` argument as follows:

```python
from lightning_data import StreamingDataset, StreamingDataLoader
from litdata import StreamingDataset, StreamingDataLoader

StreamingDataLoader(..., profile_batches=5)
```
Expand All @@ -320,7 +320,7 @@ This generates a Chrome trace called `result.json`. You can visualize this trace
Access the data you need when you need it.

```python
from lightning_data import StreamingDataset
from litdata import StreamingDataset

dataset = StreamingDataset(...)

Expand All @@ -332,7 +332,7 @@ print(dataset[42]) # show the 42th element of the dataset
## ✢ Use data transforms

```python
from lightning_data import StreamingDataset, StreamingDataLoader
from litdata import StreamingDataset, StreamingDataLoader
import torchvision.transforms.v2.functional as F

class ImagenetStreamingDataset(StreamingDataset):
Expand All @@ -354,7 +354,7 @@ for batch in dataloader:
Limit the size of the cache holding the chunks.

```python
from lightning_data import StreamingDataset
from litdata import StreamingDataset

dataset = StreamingDataset(..., max_cache_size="10GB")
```
Expand All @@ -366,7 +366,7 @@ When processing large files like compressed [parquet files](https://en.wikipedia
```python
from pathlib import Path
import pyarrow.parquet as pq
from lightning_data import optimize
from litdata import optimize
from tokenizer import Tokenizer
from functools import partial

Expand Down
10 changes: 5 additions & 5 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
SPHINX_MOCK_REQUIREMENTS = int(os.environ.get("SPHINX_MOCK_REQUIREMENTS", True))

# alternative https://stackoverflow.com/a/67692/4521646
spec = spec_from_file_location("lightning_data/__about__.py", os.path.join(_PATH_ROOT, "lightning_data", "__about__.py"))
spec = spec_from_file_location("litdata/__about__.py", os.path.join(_PATH_ROOT, "litdata", "__about__.py"))
about = module_from_spec(spec)
spec.loader.exec_module(about)

Expand Down Expand Up @@ -316,8 +316,8 @@ def find_source():
fname = inspect.getsourcefile(obj)
# https://github.com/rtfd/readthedocs.org/issues/5735
if any(s in fname for s in ("readthedocs", "rtfd", "checkouts")):
# /home/docs/checkouts/readthedocs.org/user_builds/lightning_data/checkouts/
# devel/lightning_data/utilities/cls_experiment.py#L26-L176
# /home/docs/checkouts/readthedocs.org/user_builds/litdata/checkouts/
# devel/litdata/utilities/cls_experiment.py#L26-L176
path_top = os.path.abspath(os.path.join("..", "..", ".."))
fname = os.path.relpath(fname, start=path_top)
else:
Expand Down Expand Up @@ -380,8 +380,8 @@ def find_source():
import os
import torch
import lightning_data
from lightning_data import StreamingDataset
import litdata
from litdata import StreamingDataset
"""
coverage_skip_undoc_in_source = True
20 changes: 0 additions & 20 deletions lightning_data/__init__.py

This file was deleted.

File renamed without changes.
File renamed without changes.
20 changes: 20 additions & 0 deletions litdata/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from lightning_utilities.core.imports import RequirementCache

from litdata.__about__ import * # noqa: F403
from litdata.processing.functions import map, optimize, walk
from litdata.streaming.combined import CombinedStreamingDataset
from litdata.streaming.dataloader import StreamingDataLoader
from litdata.streaming.dataset import StreamingDataset

__all__ = [
"StreamingDataset",
"CombinedStreamingDataset",
"StreamingDataLoader",
"map",
"optimize",
"walk",
]
if RequirementCache("lightning_sdk"):
from lightning_sdk import Machine # noqa: F401

__all__ + ["Machine"]
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,22 @@
import torch
from tqdm.auto import tqdm as _tqdm

from lightning_data.constants import (
from litdata.constants import (
_BOTO3_AVAILABLE,
_DEFAULT_FAST_DEV_RUN_ITEMS,
_INDEX_FILENAME,
_IS_IN_STUDIO,
_LIGHTNING_CLOUD_LATEST,
_TORCH_GREATER_EQUAL_2_1_0,
)
from lightning_data.processing.readers import BaseReader
from lightning_data.processing.utilities import _create_dataset
from lightning_data.streaming import Cache
from lightning_data.streaming.cache import Dir
from lightning_data.streaming.client import S3Client
from lightning_data.streaming.resolver import _resolve_dir
from lightning_data.utilities.broadcast import broadcast_object
from lightning_data.utilities.packing import _pack_greedily
from litdata.processing.readers import BaseReader
from litdata.processing.utilities import _create_dataset
from litdata.streaming import Cache
from litdata.streaming.cache import Dir
from litdata.streaming.client import S3Client
from litdata.streaming.resolver import _resolve_dir
from litdata.utilities.broadcast import broadcast_object
from litdata.utilities.packing import _pack_greedily

if _TORCH_GREATER_EQUAL_2_1_0:
from torch.utils._pytree import tree_flatten, tree_unflatten, treespec_loads
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@

import torch

from lightning_data.constants import _IS_IN_STUDIO, _TORCH_GREATER_EQUAL_2_1_0
from lightning_data.processing.data_processor import DataChunkRecipe, DataProcessor, DataTransformRecipe
from lightning_data.processing.readers import BaseReader
from lightning_data.processing.utilities import optimize_dns_context
from lightning_data.streaming.resolver import (
from litdata.constants import _IS_IN_STUDIO, _TORCH_GREATER_EQUAL_2_1_0
from litdata.processing.data_processor import DataChunkRecipe, DataProcessor, DataTransformRecipe
from litdata.processing.readers import BaseReader
from litdata.processing.utilities import optimize_dns_context
from litdata.streaming.resolver import (
Dir,
_assert_dir_has_index_file,
_assert_dir_is_empty,
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from subprocess import DEVNULL, Popen
from typing import Any, Callable, List, Optional, Tuple, Union

from lightning_data.constants import _IS_IN_STUDIO, _LIGHTNING_CLOUD_LATEST
from litdata.constants import _IS_IN_STUDIO, _LIGHTNING_CLOUD_LATEST

if _LIGHTNING_CLOUD_LATEST:
from lightning_cloud.openapi import (
Expand Down Expand Up @@ -132,7 +132,7 @@ def optimize_dns(enable: bool) -> None:
):
cmd = (
f"sudo /home/zeus/miniconda3/envs/cloudspace/bin/python"
f" -c 'from lightning_data.processing.utilities import _optimize_dns; _optimize_dns({enable})'"
f" -c 'from litdata.processing.utilities import _optimize_dns; _optimize_dns({enable})'"
)
Popen(cmd, shell=True, stdout=DEVNULL, stderr=DEVNULL).wait() # E501

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from lightning_data.streaming.cache import Cache
from lightning_data.streaming.combined import CombinedStreamingDataset
from lightning_data.streaming.dataloader import StreamingDataLoader
from lightning_data.streaming.dataset import StreamingDataset
from lightning_data.streaming.item_loader import TokensLoader
from litdata.streaming.cache import Cache
from litdata.streaming.combined import CombinedStreamingDataset
from litdata.streaming.dataloader import StreamingDataLoader
from litdata.streaming.dataset import StreamingDataset
from litdata.streaming.item_loader import TokensLoader

__all__ = [
"Cache",
Expand Down
18 changes: 9 additions & 9 deletions lightning_data/streaming/cache.py → litdata/streaming/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,19 @@
import os
from typing import Any, Dict, List, Optional, Tuple, Union

from lightning_data.constants import (
from litdata.constants import (
_INDEX_FILENAME,
_LIGHTNING_CLOUD_LATEST,
_TORCH_GREATER_EQUAL_2_1_0,
)
from lightning_data.streaming.item_loader import BaseItemLoader
from lightning_data.streaming.reader import BinaryReader
from lightning_data.streaming.resolver import Dir, _resolve_dir
from lightning_data.streaming.sampler import ChunkedIndex
from lightning_data.streaming.serializers import Serializer
from lightning_data.streaming.writer import BinaryWriter
from lightning_data.utilities.env import _DistributedEnv, _WorkerEnv
from lightning_data.utilities.format import _convert_bytes_to_int
from litdata.streaming.item_loader import BaseItemLoader
from litdata.streaming.reader import BinaryReader
from litdata.streaming.resolver import Dir, _resolve_dir
from litdata.streaming.sampler import ChunkedIndex
from litdata.streaming.serializers import Serializer
from litdata.streaming.writer import BinaryWriter
from litdata.utilities.env import _DistributedEnv, _WorkerEnv
from litdata.utilities.format import _convert_bytes_to_int

logger = logging.Logger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from time import time
from typing import Any, Optional

from lightning_data.constants import _BOTO3_AVAILABLE
from litdata.constants import _BOTO3_AVAILABLE

if _BOTO3_AVAILABLE:
import boto3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

from torch.utils.data import IterableDataset

from lightning_data.streaming.dataset import StreamingDataset
from lightning_data.utilities.env import _WorkerEnv
from litdata.streaming.dataset import StreamingDataset
from litdata.utilities.env import _WorkerEnv

__NUM_SAMPLES_YIELDED_KEY__ = "__NUM_SAMPLES_YIELDED__"
__SAMPLES_KEY__ = "__SAMPLES__"
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
import os
from typing import Any, Dict, List, Optional, Tuple

from lightning_data.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0
from lightning_data.streaming.downloader import get_downloader_cls
from lightning_data.streaming.item_loader import BaseItemLoader, PyTreeLoader, TokensLoader
from lightning_data.streaming.sampler import ChunkedIndex
from lightning_data.streaming.serializers import Serializer
from litdata.constants import _INDEX_FILENAME, _TORCH_GREATER_EQUAL_2_1_0
from litdata.streaming.downloader import get_downloader_cls
from litdata.streaming.item_loader import BaseItemLoader, PyTreeLoader, TokensLoader
from litdata.streaming.sampler import ChunkedIndex
from litdata.streaming.serializers import Serializer

if _TORCH_GREATER_EQUAL_2_1_0:
from torch.utils._pytree import tree_unflatten, treespec_loads
Expand Down
Loading

0 comments on commit 1368f64

Please sign in to comment.